Last active
April 13, 2025 15:36
-
-
Save refparo/495a7d460431690cb094fa5ffbb3adf1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from "fs/promises"; | |
const file = await fs.readFile("metadata.json", { encoding: "utf-8" }); | |
/** @type {{ [key: string]: { source: string } }} */ | |
const metadata = JSON.parse(file); | |
/** @type {{ [key: string]: number }} */ | |
const authors = {}; | |
Object.values(metadata).forEach((data) => { | |
const author = data.source.split("/").at(3); | |
authors[author] = (authors[author] ?? 0) + 1; | |
}); | |
for (const key in metadata) { | |
const data = metadata[key]; | |
if (key.includes("/")) continue; | |
const author = data.source.at(0).split("/").at(3); | |
if (author + "/" + key in metadata) { | |
delete metadata[key]; | |
await fs.unlink(key); | |
continue; | |
} | |
if (authors[author] > 1) { | |
delete metadata[key]; | |
metadata[author + "/" + key] = data; | |
await fs.mkdir(author, { recursive: true }); | |
await fs.rename(key, author + "/" + key); | |
} | |
} | |
await fs.writeFile("metadata.json", JSON.stringify(metadata, undefined, 2), { | |
encoding: "utf-8", | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* 保存文件。 | |
* @param {string} filename 文件名 | |
* @param {Blob} blob 文件内容 | |
*/ | |
function saveFile(filename, blob) { | |
const a = document.createElement("a"); | |
a.hidden = true; | |
document.body.appendChild(a); | |
a.href = URL.createObjectURL(blob); | |
a.download = filename; | |
a.click(); | |
a.remove(); | |
} | |
/** | |
* 下载当前页面推文中的所有图片。可以使用全局变量 `window.stopDownload` 中止下载。 | |
* | |
* @typedef {{ [key: string]: { source: string } }} Metadata | |
* | |
* @typedef {object} DownloadResult | |
* @property {Metadata} metadata | |
* @property {Element} tweet | |
* | |
* @param {object} [options] | |
* @param {number} [options.limit] 下载的推文数上限,默认为无限 | |
* @param {Metadata} [options.metadata] 初始元数据,元数据内已经存在的图片会被跳过,默认为空 | |
* @param {boolean} [options.saveMetadata] 下载完成后保存元数据,默认为保存 | |
* @param {Element} [options.tweet] 从哪一条推文开始下载,默认为第一条 | |
* @param {number} [options.interval] 下载每一条推文的间隔时间,单位毫秒,默认为 500 | |
* @return {Promise<DownloadResult>} | |
*/ | |
async function downloadImages(options = {}) { | |
let { limit, metadata, saveMetadata, tweet, interval } = { | |
limit: Number.POSITIVE_INFINITY, | |
metadata: {}, | |
saveMetadata: true, | |
tweet: document.querySelector('[data-testid="cellInnerDiv"]'), | |
interval: 500, | |
...options, | |
}; | |
try { | |
for ( | |
let count = 0; | |
!window.stopDownload && count < limit && tweet != null; | |
count++, tweet = tweet.nextSibling | |
) { | |
tweet.scrollIntoView(); | |
await new Promise((resolve) => setTimeout(() => resolve(), interval)); | |
// 获取推文链接 | |
const username = tweet.querySelector('[data-testid="User-Name"]'); | |
const link = [...username.querySelectorAll('[role="link"]')].pop(); | |
const source = link.href; | |
// 提取图片地址 | |
const photos = [...tweet.querySelectorAll('[data-testid="tweetPhoto"]')]; | |
const imgs = photos.map((e) => e.querySelector("img")); | |
for (const img of imgs) { | |
if (img == null) continue; | |
const src = img.src.replace(/name=.+/, "name=orig"); | |
const filename = src | |
.split("/") | |
.pop() | |
.split("&")[0] | |
.replace("?format=", "."); | |
const author = source.split("/").at(3); | |
if (filename in metadata || author + "/" + filename in metadata) { | |
console.log("已跳过", filename); | |
continue; | |
} | |
console.log("正在下载", src); | |
const res = await fetch(src); | |
const blob = await res.blob(); | |
saveFile(filename, blob); | |
metadata[filename] = { source }; | |
} | |
} | |
} catch (e) { | |
console.log("下载中途发生异常:", e); | |
} finally { | |
if (saveMetadata) { | |
console.log("正在保存元数据"); | |
const blob = new Blob([JSON.stringify(metadata, undefined, 2)], { | |
type: "application/json", | |
}); | |
saveFile("metadata.json", blob); | |
} | |
console.log("下载完成"); | |
window.stopDownload = false; | |
return { metadata, tweet }; | |
} | |
} | |
result = await downloadImages(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** 点击屏幕上任意位置,读取一个文件。 */ | |
async function loadFile() { | |
const input = document.createElement("input"); | |
input.type = "file"; | |
input.hidden = true; | |
document.body.appendChild(input); | |
/** @type {Promise<File>} */ | |
const promisedFile = new Promise((resolve) => { | |
input.addEventListener("change", () => { | |
const file = input.files[0]; | |
console.log("已选择文件", file); | |
resolve(file); | |
}); | |
}); | |
document.body.addEventListener( | |
"click", | |
() => { | |
input.showPicker(); | |
}, | |
{ once: true }, | |
); | |
console.log("点击任意位置上传文件"); | |
const file = await promisedFile; | |
input.remove(); | |
return file; | |
} | |
/** 读取元数据字典。 */ | |
async function loadMetadata() { | |
const file = await loadFile(); | |
return JSON.parse(await file.text()); | |
} | |
metadata = await loadMetadata(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment