Last active
August 21, 2017 16:49
-
-
Save aseemk/ec426819df805d5c173bce3ee1c624a3 to your computer and use it in GitHub Desktop.
FiftyThree Mix (AKA Paper Public Stream) archiver/scraper script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "mix-archiver", | |
"version": "1.0.0", | |
"description": "Scrape and archive a FiftyThree Mix (Paper Public Stream) profile.", | |
"bin": "scrape.js", | |
"author": "Aseem Kishore <[email protected]>", | |
"license": "MIT", | |
"dependencies": {}, | |
"devDependencies": { | |
"superagent": "^3.6.0" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// | |
// Usage: simply run `node scrape.js` in the directory where you want JSON and images downloaded. | |
// They'll be downloaded to `json` and `images` subdirectories, respectively. | |
const echo = console.log; | |
const FS = require('fs'); | |
const Path = require('path'); | |
const superagent = require('superagent'); | |
const API_ROOT = 'https://paper-api.fiftythree.com'; | |
const USER_ID = 'aseemk'; // <-- FILL IN YOUR MIX USER ID HERE (can find by going to https://paper.fiftythree.com/me) | |
// These file paths are relative to the current working directory: | |
const JSON_DIR = 'json'; | |
const IMAGES_DIR = 'images'; | |
const ensureDir = async (path) => { | |
try { | |
FS.mkdirSync(path); | |
} catch (err) { | |
if (err.code !== 'EEXIST') { | |
throw err; | |
} | |
} | |
}; | |
const setup = async () => { | |
echo(`Creating directories...`); | |
await ensureDir(JSON_DIR); | |
await ensureDir(IMAGES_DIR); | |
}; | |
const saveJSON = async (filename, data, verbose) => { | |
const file = Path.join(JSON_DIR, filename); | |
const json = JSON.stringify(data, null, 4); | |
if (verbose) echo(`Saving ${file}...`); | |
FS.writeFileSync(file, json, 'utf8'); | |
}; | |
const downloadImage = async (filename, url) => { | |
const file = Path.join(IMAGES_DIR, filename); | |
const stream = FS.createWriteStream(file); | |
await superagent.get(url).pipe(stream); | |
}; | |
const getExtension = (imageType) => { | |
switch (imageType) { | |
case 'png': return '.png'; | |
case 'jpeg': return '.jpg'; | |
default: return ''; | |
} | |
}; | |
const scrape = async () => { | |
let items = []; | |
let nextURL = `/users/${USER_ID}/creations?count=100`; | |
let page = 1; | |
while (nextURL) { | |
echo(`Fetching JSON page ${page}...`); | |
const {body} = await superagent.get(API_ROOT + nextURL); | |
items.push(...body.items); | |
nextURL = body.nextURL; | |
page += 1; | |
} | |
await saveJSON(`${USER_ID}-creations.json`, { | |
type: 'list', | |
items, | |
}, true); | |
echo(`Saving individual creation JSONs...`); | |
await Promise.all(items.map(async (item) => { | |
return await saveJSON(`${item.id}.json`, item); | |
})); | |
echo(`Downloading individual creation images...`); | |
await Promise.all(items.map(async (item) => { | |
if (!item.imageURLs) { | |
return; | |
} | |
const ext = getExtension(item.imageType); | |
const key = `${item.imageWidth}x${item.imageHeight}/${item.imageType}`; | |
const url = item.imageURLs[key]; | |
return await downloadImage(`${item.id}${ext}`, url); | |
})); | |
}; | |
const main = async () => { | |
await setup(); | |
await scrape(); | |
}; | |
main() | |
.then(() => echo('Done!')) | |
.catch((err) => echo(err)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment