Skip to content

Instantly share code, notes, and snippets.

@GHolk
Last active December 2, 2022 10:12
Show Gist options
  • Save GHolk/3f245c6e63a02597640e24b7232369ee to your computer and use it in GitHub Desktop.
Save GHolk/3f245c6e63a02597640e24b7232369ee to your computer and use it in GitHub Desktop.
var downloadJob = tri.downloadJob = {
browser: browser,
os: "linux",
native: false,
downloadPath: "kwk",
stop: false,
ex: tri.excmds,
async init(id = null) {
if (tri.browserBg) this.browser = tri.browserBg
let tab
if (id != null) tab = await this.browser.tabs.get(id)
else [tab] = await this.browser.tabs.query({
active: true,
currentWindow: true
});
this.tabId = tab.id;
this.windowId = tab.windowId;
this.cookieStoreId = tab.cookieStoreId;
this.domParser = new DOMParser();
this.native = Boolean(await tri.native.getNativeMessengerVersion());
if ((/Windows NT/i).test(navigator.userAgent)) this.os = 'windows'
},
async download(url, file) {
let id;
try {
id = await this.browser.downloads.download({
filename: file,
url: url,
cookieStoreId: this.cookieStoreId
});
} catch (fail) {
fail.codeName = "download-error";
throw fail;
}
return {
id: id,
url: url,
file: file
};
},
getTitle(url) {
const path = url.pathname;
let title;
if (path.match(/index.php/)) {
title = url.searchParams.get("title");
} else title = decodeURIComponent(url.pathname.slice(1));
return title.replace(/ /g, '_').replace(/[*"'.?/:]/g, c => "%" + c.charCodeAt().toString(16).toUpperCase());
},
async downloadFromAnchor(a) {
const url = new URL(a.href);
const title = this.getTitle(url);
const file = title + ".html";
const cacheNode = a.nextElementSibling.querySelector('a[href^="https://webcache"]');
if (!cacheNode) return "no-cache";
if (this.native) {
if (await this.fileExist(file)) return "skip";
}
const cache = cacheNode.href;
return await this.download(cache, `${this.downloadPath}/${file}`);
},
downloadCurrentHtml() {
function doctypeToString(node = document.doctype) {
if (!node) return "";
return "<!DOCTYPE " + node.name + (node.publicId ? ` PUBLIC "${node.publicId}"` : "") + (!node.publicId && node.systemId ? " SYSTEM" : "") + (node.systemId ? ` "${node.systemId}"` : "") + ">" + "\n";
}
function cleanCopy(root) {
const deep = true;
const copy = root.cloneNode(deep);
copy.querySelectorAll("iframe[src ^= moz-extension").forEach(e => e.remove());
fixRelativeUrl(copy);
fixEncode(copy);
return copy;
}
function fixEncode(root) {
if (document.characterSet == "UTF-8") return;
const list = root.querySelectorAll("meta[http-equiv=content-type]," + "meta[http-equiv=Content-Type]," + "meta[charset]");
if (list.length == 0) {
if (!confirm("not UTF-8 and no charset tag found, add one?")) return;
const encodeNode = document.createElement("meta");
encodeNode.setAttribute("charset", "utf-8");
encodeNode.dataset.gholkOriginalCharset = "";
const head = root.querySelector("head");
if (head) head.prepend(encodeNode); else root.prepend(encodeNode);
} else {
list.forEach(encodeNode => {
if (encodeNode.hasAttribute("charset")) {
const original = encodeNode.getAttribute("charset");
encodeNode.dataset.gholkOriginalCharset = original;
encodeNode.setAttribute("charset", "utf-8");
} else if (encodeNode.hasAttribute("http-equiv")) {
encodeNode.dataset.gholkOriginalContentType = encodeNode.content;
encodeNode.content = "text/html; charset=UTF-8";
} else alert(`unknown error while fix encode node: ${encodeNode.outerHTML}`);
});
}
}
function fixRelativeUrl(root) {
let base = root.querySelector("base");
if (base) {
const relative = base.getAttribute("href");
base.dataset.gholkOriginalHref = relative;
base.setAttribute("href", base.href);
} else {
base = document.createElement("base");
base.href = root.baseURI;
base.dataset.gholkOriginalHref = "";
let head = root.querySelector("head");
if (head) head.prepend(base); else root.prepend(base);
}
}
const html = doctypeToString() + cleanCopy(document.documentElement).outerHTML;
const blob = new Blob([ html ], {
type: "text/html"
});
const download = document.createElement("a");
download.download = document.title + ".html";
download.href = URL.createObjectURL(blob);
document.body.appendChild(download);
download.click();
download.remove();
URL.revokeObjectURL(blob);
},
async downloadFromPath(path) {
const title = path.slice(1);
const url = `https://wiki.komica.org/${title}`;
const cache = "http://webcache.googleusercontent.com/search?q=cache:" + url;
this.cacheUrl = cache;
const file = title.replace(/[*"'.?/:]/g, c => encodeURIComponent(c)) + ".html";
if (this.native && await this.fileExist(file)) return "skip";
return await this.download(cache, this.downloadPath + "/" + file);
},
async fileExist(file) {
const ex = this.ex;
const safe = await ex.shellescape(file);
let exist;
if (this.os == "windows") {
exist = await ex.exclaim_quiet(`if exist ..\\Downloads\\${this.downloadPath}\\${safe} echo exist`);
} else exist = await ex.exclaim_quiet(`test -f $HOME/Downloads/${this.downloadPath}/${safe} && echo -n exist`);
return Boolean(exist);
},
async queryIdList(list) {
const ql = list.map(id => this.browser.downloads.search({
id: id
}));
return (await Promise.all(ql)).flat();
},
async dlax(method, n = 0) {
await this.init();
let l;
let backupGoogleSearch = false;
if (method == "downloadFromPath") {
l = tri.state.komica_wiki_data;
} else if (method == "downloadFromAnchor") {
l = this.list || $all("a>h3").map(h => h.parentNode);
} else throw new Error("unknown method");
for (let i = n; i < l.length && !this.stop; true) {
console.log("line " + i);
let errorDownload;
let dl;
try {
dl = await this[method](l[i]);
} catch (error) {
if (error.codeName != "download-error") throw error;
errorDownload = error;
}
if (dl == "skip") {
console.log("skip");
i++;
continue;
}
if (dl == "no-cache") {
backupGoogleSearch = true;
i++;
continue;
}
let retry = false;
while (true) {
const state = (await this.browser.downloads.search({
id: dl.id
}))[0];
if (state.state == "complete") {
await this.ex.sleep(3e3);
break;
}
if (state.state == "in_progress") {
await this.ex.sleep(500);
continue;
}
await this.sleep(1);
const tabp = await this.tabopen(dl.url);
const tab = await tabp;
const is404p = this.check404(tab.id);
const first = await Promise.race([ is404p, this.sleep(3).then(x => "timeout") ]);
if (first == "error-404") {
await this.browser.tabs.remove(tab.id);
await this.sleep(2);
backupGoogleSearch = true;
}
if (first != "timeout") break;
retry = await this.tabEval(`confirm('retry ${i}? (or skip)')`);
if (!retry) {
try {
await this.browser.tabs.remove(tab.id);
backupGoogleSearch = true;
} catch (error) {
console.error(error);
console.log("tab is already closed");
}
}
break;
}
if (!retry) i++;
}
if (method == "downloadFromAnchor") {
if (backupGoogleSearch) {
this.downloadCurrentHtml();
await this.sleep(1);
}
this.confirmNext();
}
},
confirmNext() {
if (confirm(`next?`)) $("#pnnext").click();
},
async dlap(n = 0) {
return await this.dlax("downloadFromPath", n);
},
sleep(s) {
return new Promise(wake => setTimeout(wake, s * 1e3));
},
async check404(tabId) {
while (true) {
let tab;
try {
tab = await this.browser.tabs.get(tabId);
} catch (closed) {
return "close";
}
if (tab.title.match(/^Error 404/)) return "error-404";
await this.ex.sleep(200);
}
},
async tabEval(code) {
if (tri.browserBg) return eval(code);
const resultList = await this.browser.tabs.executeScript(this.tabId, {
code: code
});
return resultList[0];
},
async tabopen(url) {
return await this.browser.tabs.create({
active: false,
url: url,
windowId: this.windowId,
cookieStoreId: this.cookieStoreId
});
},
async dlan(list = null) {
this.list = list;
return await this.dlax("downlaodFromAnchor");
},
async dlar(n) {
this.confirmNext = (() => 0);
await this.dlax("downloadFromAnchor");
const u = new URL(location.href);
const q = u.searchParams.get("q");
const word = q.replace(/^[^ ]* /, "");
const [list] = await $ex("state_get komica_wiki_data");
const index = list.indexOf("/" + word.replace(/ /g, "_"));
if (index == -1) return alert(`not-found-in-list ${word}`);
let wordNext;
for (let i = index + 1; i < list.length; i++) {
const path = list[i];
const file = this.getTitle({
pathname: path.replace(/%/g, '%25')
});
if (!await this.fileExist(file + ".html")) {
wordNext = path.slice(1);
break;
}
}
if (!wordNext) return alert("done");
wordNext = wordNext.replace(/_/g, " ");
location.href = `?q=site:wiki.komica.org+${encodeURIComponent(wordNext)}`;
},
Defer: function Defer() {
this.promise = new Promise((resolve, reject) => {
this.resolve = resolve;
this.reject = reject;
});
},
async keywordToCacheUrl(word) {
const google = "https://www.google.com/search";
const res = await fetch(`${google}?q=${word}`);
const html = await res.text();
const doc = this.domParser.parseFromString(html, "text/html");
const scriptList = Array.from(doc.querySelectorAll("script"));
const regexp = /\\x22https?:..webcache.googleusercontent.com.*?\\x22/g;
const script = scriptList.find(s => regexp.test(s.textContent));
const urlCodeList = script.textContent.match(regexp);
const urlList = [];
for (const code of urlCodeList) {
const u = code.replace(/\\x[0-9a-f]{2}/g, c => String.fromCodePoint(parseInt(c.slice(2), 16))).replace(/&amp;/g, "&");
urlList.push(u);
}
return urlList;
},
async readListFromClipboardToState(x) {
let l;
if (!x) x = await this.ex.getclip("clipboard");
if (typeof x == "string") l = x.split("\n"); else l = x;
tri.state.komica_wiki_data = l;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment