Last active
December 2, 2022 10:12
-
-
Save GHolk/3f245c6e63a02597640e24b7232369ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var downloadJob = tri.downloadJob = { | |
browser: browser, | |
os: "linux", | |
native: false, | |
downloadPath: "kwk", | |
stop: false, | |
ex: tri.excmds, | |
async init(id = null) { | |
if (tri.browserBg) this.browser = tri.browserBg | |
let tab | |
if (id != null) tab = await this.browser.tabs.get(id) | |
else [tab] = await this.browser.tabs.query({ | |
active: true, | |
currentWindow: true | |
}); | |
this.tabId = tab.id; | |
this.windowId = tab.windowId; | |
this.cookieStoreId = tab.cookieStoreId; | |
this.domParser = new DOMParser(); | |
this.native = Boolean(await tri.native.getNativeMessengerVersion()); | |
if ((/Windows NT/i).test(navigator.userAgent)) this.os = 'windows' | |
}, | |
async download(url, file) { | |
let id; | |
try { | |
id = await this.browser.downloads.download({ | |
filename: file, | |
url: url, | |
cookieStoreId: this.cookieStoreId | |
}); | |
} catch (fail) { | |
fail.codeName = "download-error"; | |
throw fail; | |
} | |
return { | |
id: id, | |
url: url, | |
file: file | |
}; | |
}, | |
getTitle(url) { | |
const path = url.pathname; | |
let title; | |
if (path.match(/index.php/)) { | |
title = url.searchParams.get("title"); | |
} else title = decodeURIComponent(url.pathname.slice(1)); | |
return title.replace(/ /g, '_').replace(/[*"'.?/:]/g, c => "%" + c.charCodeAt().toString(16).toUpperCase()); | |
}, | |
async downloadFromAnchor(a) { | |
const url = new URL(a.href); | |
const title = this.getTitle(url); | |
const file = title + ".html"; | |
const cacheNode = a.nextElementSibling.querySelector('a[href^="https://webcache"]'); | |
if (!cacheNode) return "no-cache"; | |
if (this.native) { | |
if (await this.fileExist(file)) return "skip"; | |
} | |
const cache = cacheNode.href; | |
return await this.download(cache, `${this.downloadPath}/${file}`); | |
}, | |
downloadCurrentHtml() { | |
function doctypeToString(node = document.doctype) { | |
if (!node) return ""; | |
return "<!DOCTYPE " + node.name + (node.publicId ? ` PUBLIC "${node.publicId}"` : "") + (!node.publicId && node.systemId ? " SYSTEM" : "") + (node.systemId ? ` "${node.systemId}"` : "") + ">" + "\n"; | |
} | |
function cleanCopy(root) { | |
const deep = true; | |
const copy = root.cloneNode(deep); | |
copy.querySelectorAll("iframe[src ^= moz-extension").forEach(e => e.remove()); | |
fixRelativeUrl(copy); | |
fixEncode(copy); | |
return copy; | |
} | |
function fixEncode(root) { | |
if (document.characterSet == "UTF-8") return; | |
const list = root.querySelectorAll("meta[http-equiv=content-type]," + "meta[http-equiv=Content-Type]," + "meta[charset]"); | |
if (list.length == 0) { | |
if (!confirm("not UTF-8 and no charset tag found, add one?")) return; | |
const encodeNode = document.createElement("meta"); | |
encodeNode.setAttribute("charset", "utf-8"); | |
encodeNode.dataset.gholkOriginalCharset = ""; | |
const head = root.querySelector("head"); | |
if (head) head.prepend(encodeNode); else root.prepend(encodeNode); | |
} else { | |
list.forEach(encodeNode => { | |
if (encodeNode.hasAttribute("charset")) { | |
const original = encodeNode.getAttribute("charset"); | |
encodeNode.dataset.gholkOriginalCharset = original; | |
encodeNode.setAttribute("charset", "utf-8"); | |
} else if (encodeNode.hasAttribute("http-equiv")) { | |
encodeNode.dataset.gholkOriginalContentType = encodeNode.content; | |
encodeNode.content = "text/html; charset=UTF-8"; | |
} else alert(`unknown error while fix encode node: ${encodeNode.outerHTML}`); | |
}); | |
} | |
} | |
function fixRelativeUrl(root) { | |
let base = root.querySelector("base"); | |
if (base) { | |
const relative = base.getAttribute("href"); | |
base.dataset.gholkOriginalHref = relative; | |
base.setAttribute("href", base.href); | |
} else { | |
base = document.createElement("base"); | |
base.href = root.baseURI; | |
base.dataset.gholkOriginalHref = ""; | |
let head = root.querySelector("head"); | |
if (head) head.prepend(base); else root.prepend(base); | |
} | |
} | |
const html = doctypeToString() + cleanCopy(document.documentElement).outerHTML; | |
const blob = new Blob([ html ], { | |
type: "text/html" | |
}); | |
const download = document.createElement("a"); | |
download.download = document.title + ".html"; | |
download.href = URL.createObjectURL(blob); | |
document.body.appendChild(download); | |
download.click(); | |
download.remove(); | |
URL.revokeObjectURL(blob); | |
}, | |
async downloadFromPath(path) { | |
const title = path.slice(1); | |
const url = `https://wiki.komica.org/${title}`; | |
const cache = "http://webcache.googleusercontent.com/search?q=cache:" + url; | |
this.cacheUrl = cache; | |
const file = title.replace(/[*"'.?/:]/g, c => encodeURIComponent(c)) + ".html"; | |
if (this.native && await this.fileExist(file)) return "skip"; | |
return await this.download(cache, this.downloadPath + "/" + file); | |
}, | |
async fileExist(file) { | |
const ex = this.ex; | |
const safe = await ex.shellescape(file); | |
let exist; | |
if (this.os == "windows") { | |
exist = await ex.exclaim_quiet(`if exist ..\\Downloads\\${this.downloadPath}\\${safe} echo exist`); | |
} else exist = await ex.exclaim_quiet(`test -f $HOME/Downloads/${this.downloadPath}/${safe} && echo -n exist`); | |
return Boolean(exist); | |
}, | |
async queryIdList(list) { | |
const ql = list.map(id => this.browser.downloads.search({ | |
id: id | |
})); | |
return (await Promise.all(ql)).flat(); | |
}, | |
async dlax(method, n = 0) { | |
await this.init(); | |
let l; | |
let backupGoogleSearch = false; | |
if (method == "downloadFromPath") { | |
l = tri.state.komica_wiki_data; | |
} else if (method == "downloadFromAnchor") { | |
l = this.list || $all("a>h3").map(h => h.parentNode); | |
} else throw new Error("unknown method"); | |
for (let i = n; i < l.length && !this.stop; true) { | |
console.log("line " + i); | |
let errorDownload; | |
let dl; | |
try { | |
dl = await this[method](l[i]); | |
} catch (error) { | |
if (error.codeName != "download-error") throw error; | |
errorDownload = error; | |
} | |
if (dl == "skip") { | |
console.log("skip"); | |
i++; | |
continue; | |
} | |
if (dl == "no-cache") { | |
backupGoogleSearch = true; | |
i++; | |
continue; | |
} | |
let retry = false; | |
while (true) { | |
const state = (await this.browser.downloads.search({ | |
id: dl.id | |
}))[0]; | |
if (state.state == "complete") { | |
await this.ex.sleep(3e3); | |
break; | |
} | |
if (state.state == "in_progress") { | |
await this.ex.sleep(500); | |
continue; | |
} | |
await this.sleep(1); | |
const tabp = await this.tabopen(dl.url); | |
const tab = await tabp; | |
const is404p = this.check404(tab.id); | |
const first = await Promise.race([ is404p, this.sleep(3).then(x => "timeout") ]); | |
if (first == "error-404") { | |
await this.browser.tabs.remove(tab.id); | |
await this.sleep(2); | |
backupGoogleSearch = true; | |
} | |
if (first != "timeout") break; | |
retry = await this.tabEval(`confirm('retry ${i}? (or skip)')`); | |
if (!retry) { | |
try { | |
await this.browser.tabs.remove(tab.id); | |
backupGoogleSearch = true; | |
} catch (error) { | |
console.error(error); | |
console.log("tab is already closed"); | |
} | |
} | |
break; | |
} | |
if (!retry) i++; | |
} | |
if (method == "downloadFromAnchor") { | |
if (backupGoogleSearch) { | |
this.downloadCurrentHtml(); | |
await this.sleep(1); | |
} | |
this.confirmNext(); | |
} | |
}, | |
confirmNext() { | |
if (confirm(`next?`)) $("#pnnext").click(); | |
}, | |
async dlap(n = 0) { | |
return await this.dlax("downloadFromPath", n); | |
}, | |
sleep(s) { | |
return new Promise(wake => setTimeout(wake, s * 1e3)); | |
}, | |
async check404(tabId) { | |
while (true) { | |
let tab; | |
try { | |
tab = await this.browser.tabs.get(tabId); | |
} catch (closed) { | |
return "close"; | |
} | |
if (tab.title.match(/^Error 404/)) return "error-404"; | |
await this.ex.sleep(200); | |
} | |
}, | |
async tabEval(code) { | |
if (tri.browserBg) return eval(code); | |
const resultList = await this.browser.tabs.executeScript(this.tabId, { | |
code: code | |
}); | |
return resultList[0]; | |
}, | |
async tabopen(url) { | |
return await this.browser.tabs.create({ | |
active: false, | |
url: url, | |
windowId: this.windowId, | |
cookieStoreId: this.cookieStoreId | |
}); | |
}, | |
async dlan(list = null) { | |
this.list = list; | |
return await this.dlax("downlaodFromAnchor"); | |
}, | |
async dlar(n) { | |
this.confirmNext = (() => 0); | |
await this.dlax("downloadFromAnchor"); | |
const u = new URL(location.href); | |
const q = u.searchParams.get("q"); | |
const word = q.replace(/^[^ ]* /, ""); | |
const [list] = await $ex("state_get komica_wiki_data"); | |
const index = list.indexOf("/" + word.replace(/ /g, "_")); | |
if (index == -1) return alert(`not-found-in-list ${word}`); | |
let wordNext; | |
for (let i = index + 1; i < list.length; i++) { | |
const path = list[i]; | |
const file = this.getTitle({ | |
pathname: path.replace(/%/g, '%25') | |
}); | |
if (!await this.fileExist(file + ".html")) { | |
wordNext = path.slice(1); | |
break; | |
} | |
} | |
if (!wordNext) return alert("done"); | |
wordNext = wordNext.replace(/_/g, " "); | |
location.href = `?q=site:wiki.komica.org+${encodeURIComponent(wordNext)}`; | |
}, | |
Defer: function Defer() { | |
this.promise = new Promise((resolve, reject) => { | |
this.resolve = resolve; | |
this.reject = reject; | |
}); | |
}, | |
async keywordToCacheUrl(word) { | |
const google = "https://www.google.com/search"; | |
const res = await fetch(`${google}?q=${word}`); | |
const html = await res.text(); | |
const doc = this.domParser.parseFromString(html, "text/html"); | |
const scriptList = Array.from(doc.querySelectorAll("script")); | |
const regexp = /\\x22https?:..webcache.googleusercontent.com.*?\\x22/g; | |
const script = scriptList.find(s => regexp.test(s.textContent)); | |
const urlCodeList = script.textContent.match(regexp); | |
const urlList = []; | |
for (const code of urlCodeList) { | |
const u = code.replace(/\\x[0-9a-f]{2}/g, c => String.fromCodePoint(parseInt(c.slice(2), 16))).replace(/&/g, "&"); | |
urlList.push(u); | |
} | |
return urlList; | |
}, | |
async readListFromClipboardToState(x) { | |
let l; | |
if (!x) x = await this.ex.getclip("clipboard"); | |
if (typeof x == "string") l = x.split("\n"); else l = x; | |
tri.state.komica_wiki_data = l; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment