Last active
December 18, 2017 21:36
-
-
Save matthewpizza/3bf562a6257eb1cbea625f87062fb699 to your computer and use it in GitHub Desktop.
Trying to backup my Instagram stuff because their sandboxed API limitations only let you get the most recent 20 items
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Step 1: Scroll down until all items have loaded. | |
// Step 2: Paste the following into the console. | |
// TODO: Probably the classNames and document structure change ¯\_(ツ)_/¯ | |
const items = document.querySelectorAll('._mck9w._gvoze._f2mse'); | |
const media = []; | |
let item; | |
for (let i = 0, len = items.length; i < len; i++) { | |
item = items[i]; | |
let is_video = false; | |
try { | |
is_video = 'Video' === item.childNodes[0].childNodes[1].childNodes[0].childNodes[0].innerText; | |
} catch (e) {} | |
media.push({ | |
href: item.childNodes[0].href, | |
src: item.childNodes[0].childNodes[0].childNodes[0].childNodes[0].src, | |
is_video: is_video | |
}) | |
} | |
// Step 3: Right click this line and “Save as…” | |
console.log(JSON.stringify(media, null, 2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Run this with node | |
const fs = require('fs'); | |
const https = require('https'); | |
const { exec } = require('child_process'); | |
// Whatever you called this file | |
const items = require('./data.json'); | |
const dir = `${__dirname}/media`; | |
if (!fs.existsSync(dir)) { | |
fs.mkdirSync(dir); | |
} | |
function getVideo(item) { | |
https.get(item.href, (res) => { | |
const { statusCode } = res; | |
if (200 !== statusCode) { | |
console.log(`:( ${statusCode} for ${item.href}`); | |
res.resume(); | |
return; | |
} | |
res.setEncoding('utf8'); | |
let data = ''; | |
res.on('data', (chunk) => { data += chunk; }); | |
res.on('end', () => { | |
let url = data.match(/\<meta\sproperty\=\"og:video\"\scontent\=\"(.*)\"\s\/\>/i)[1]; | |
downloadMedia(url); | |
}); | |
}) | |
} | |
function basename(path) { | |
return path.replace(/\\/g,'/').replace(/.*\//, ''); | |
} | |
function downloadMedia(url) { | |
if (fs.existsSync(`${dir}/${basename(url)}`)) { | |
console.log(`${dir}/${basename(url)} exists`); | |
return; | |
} | |
// I’m lazy | |
console.log(`Fetching ${url}`); | |
exec(`cd ${dir}; wget ${url}`); | |
} | |
let item; | |
for (let i = 0, len = items.length; i < len; i++) { | |
item = items[i]; | |
if (item.is_video) { | |
getVideo(item); | |
continue; | |
} | |
downloadMedia(item.src) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment