Last active
November 12, 2019 23:02
-
-
Save iaincollins/82e1f245271aaa2bf2d100010b0afb1f to your computer and use it in GitHub Desktop.
Script to fetch images from Flickr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. Add your API key and secret below | |
// 2. Run `npm i flickrapi` to install the SDK dependency | |
// 3. Run 'node auth.js` and follow the instructions | |
// (will open browser window, need to paste number back in terminal and copy entire output) | |
const Flickr = require('flickrapi') | |
const FLICKR_API_KEY = 'YOUR-API-KEY' | |
const FLICKR_SECRET = 'YOUR-API-SECRET' | |
Flickr.authenticate({ | |
api_key: FLICKR_API_KEY, | |
secret: FLICKR_SECRET, | |
}, async (err, flickr) => { | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const csv = require('csv-parser') | |
const fs = require('fs') | |
const results = [] | |
const download = require('image-downloader') | |
const mkdirp = require('mkdirp') | |
fs.createReadStream('photos.csv') | |
.pipe(csv()) | |
.on('data', (data) => results.push(data)) | |
.on('end', async () => { | |
for (const result of results) { | |
await new Promise(resolve => { | |
const { photoId, title, url } = result | |
// Use hashing on path to file to avoid too many files in one directory | |
const baseDir = `./images/${photoId[0]}${photoId[1]}` | |
mkdirp.sync(baseDir) | |
const dest = `${baseDir}/${photoId}.jpg` | |
if (!fs.existsSync(dest)) { | |
console.log(`Downloading image ${photoId} (${url.trim()})…"`) | |
download.image({ | |
url: url.trim(), | |
dest | |
}) | |
.then(({ filename, image }) => { | |
console.log('Saved image to', dest) | |
resolve(true) | |
}) | |
.catch((err) => console.error(err)) | |
} else { | |
console.log(`Skipping image ${photoId} (${url.trim()})…" as already downloaded`) | |
resolve(true) | |
} | |
}) | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Script to fetch list of images from flickr API | |
* | |
* For Flickr API docs see: | |
* https://github.com/Pomax/node-flickrapi | |
* https://www.flickr.com/services/api/flickr.photos.search.html | |
**/ | |
require('dotenv').config() | |
const Flickr = require('flickrapi') | |
const Datastore = require('nedb') | |
// Credentials expected to be environment variables, but you can hard code them here instead | |
const FLICKR_API_KEY = process.env.FLICKR_API_KEY | |
const FLICKR_SECRET = process.env.FLICKR_SECRET | |
//const FLICKR_API_QUERY = { text: "paulfavs" } | |
//const FLICKR_API_QUERY = { tags: "paulfavs" } | |
const FLICKR_API_QUERY = { user_id: 'paul_clarke' } | |
// Option to manually skip pages below a certain number, for results we know we already have. | |
// | |
// This is useful for retrying when building a list of photos as the Flickr API can stall | |
// after handling a large number of requests. This provides a quick and dirty way to resume | |
// from a manually specified point. | |
// | |
// NB: Can be any point before the last successful request; doesn't have to be exact it will | |
// never record duplicates is keyed on Photo ID (which is unique). | |
const SKIP_PAGES_BELOW_THRESHOLD = 0 | |
const db = new Datastore({ filename: 'photos.db', autoload: true }) | |
flickrOptions = { | |
api_key: FLICKR_API_KEY, | |
secret: FLICKR_SECRET, | |
progress: false | |
} | |
Flickr.tokenOnly(flickrOptions, async (err, flickr) => { | |
const query = FLICKR_API_QUERY | |
const numberOfPages = await getNumberOfPhotos(flickr, query) | |
if (SKIP_PAGES_BELOW_THRESHOLD > 0) | |
console.log(`Skipping pages below page ${SKIP_PAGES_BELOW_THRESHOLD}…`) | |
let promises = [] | |
for (let page = 1; page <= numberOfPages; page++) { | |
if (page > SKIP_PAGES_BELOW_THRESHOLD) { | |
const photosInPage = await getPhotosInPage(flickr, query, page) | |
photosInPage.forEach(async (photo) => { | |
const promise = new Promise(async resolve => { | |
db.findOne({ photoId: photo.id }, async (err, doc) => { | |
const pageDescription = `Page ${page} of ${numberOfPages}` | |
if (doc) { | |
// Use existing object | |
// TODO Placeholder in case want to add additional data | |
console.log(`Updating Photo #${doc.photoId} (${pageDescription}) - ${doc.title}`) | |
const newDoc = doc | |
db.update({ _id: doc._id }, newDoc) | |
} else { | |
// Add new object | |
const newDoc = { | |
photoId: photo.id, | |
page: pageDescription, | |
public: photo.ispublic === 1 ? true : false, | |
url: await getLargestPhotoUrl(flickr, photo.id), | |
title: photo.title, | |
lastUpdated: new Date().toISOString() | |
} | |
console.log(`Added new Photo #${newDoc.photoId} (${pageDescription}) - ${newDoc.title}`) | |
db.insert(newDoc) | |
} | |
}) | |
resolve(true) | |
}) | |
promises.push(promise) | |
}) | |
} | |
} | |
await Promise.all(promises) | |
console.log("Completed!") | |
// Simple hacky console log of entire DB once script is complete, so can easily manually capture output to a CSV | |
console.log(`photoId,public,url,title,lastUpdated`) | |
db.find({ }, function (err, docs) { | |
docs.forEach(doc => console.log(`${doc.photoId},${doc.public},${doc.url},"${doc.title}",${doc.lastUpdated}`)) | |
}); | |
}) | |
const getNumberOfPhotos = async(flickr, query) => { | |
return new Promise(resolve => { | |
flickr.photos.search(query, (err, result) => { | |
if (err) { throw new Error(err) } | |
resolve(result.photos.pages) | |
}) | |
}) | |
} | |
const getPhotosInPage = async(flickr, query, page) => { | |
return new Promise(resolve => { | |
flickr.photos.search({ | |
...query, | |
page, | |
}, (err, result) => { | |
if (err) { throw new Error(err) } | |
resolve(result.photos.photo) | |
}) | |
}) | |
} | |
const getLargestPhotoUrl = async(flickr, photo_id) => { | |
return new Promise(resolve => { | |
flickr.photos.getSizes({ photo_id }, (err, result) => { | |
if (err) { throw new Error(err) } | |
// Largest photo they have is always the last one in the list | |
// NB: Usually, but not always, it has the label 'Original'. | |
const largestPhotoUrl = result.sizes.size[result.sizes.size.length - 1].source | |
resolve(largestPhotoUrl) | |
}) | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment