iaincollins · November 12, 2019 23:02
diff --git a/auth.js b/auth.js
 // 1. Add your API key and secret below
 // 2. Run `npm i flickrapi` to install the SDK dependency
 // 3. Run 'node auth.js` and follow the instructions
 // (will open browser window, need to paste number back in terminal and copy entire output)
 const Flickr = require('flickrapi')

 const FLICKR_API_KEY = 'YOUR-API-KEY'
 const FLICKR_SECRET = 'YOUR-API-SECRET'

 Flickr.authenticate({
  api_key: FLICKR_API_KEY,
  secret: FLICKR_SECRET,
 }, async (err, flickr) => {

 });
diff --git a/flickr-download.js b/flickr-download.js
 const csv = require('csv-parser')
 const fs = require('fs')
 const results = []
 const download = require('image-downloader')
 const mkdirp = require('mkdirp')

 fs.createReadStream('photos.csv')
 .pipe(csv())
 .on('data', (data) => results.push(data))
 .on('end', async () => {
  for (const result of results) {
    await new Promise(resolve => {
      const { photoId, title, url } = result
      
      // Use hashing on path to file to avoid too many files in one directory
      const baseDir = `./images/${photoId[0]}${photoId[1]}`
      mkdirp.sync(baseDir)
      
      const dest = `${baseDir}/${photoId}.jpg`

      if (!fs.existsSync(dest)) {
        console.log(`Downloading image ${photoId} (${url.trim()})…"`)
        download.image({
          url: url.trim(),
          dest
        })
        .then(({ filename, image }) => {
          console.log('Saved image to', dest)
          resolve(true)
        })
        .catch((err) => console.error(err))
      } else {
        console.log(`Skipping image ${photoId} (${url.trim()})…" as already downloaded`)
        resolve(true)
      }
    })
  }
 });
diff --git a/flickr-fetch.js b/flickr-fetch.js
 /**
 * Script to fetch list of images from flickr API
 * 
 * For Flickr API docs see:
 * https://github.com/Pomax/node-flickrapi
 * https://www.flickr.com/services/api/flickr.photos.search.html
 **/

 require('dotenv').config()

 const Flickr = require('flickrapi')
 const Datastore = require('nedb')

 // Credentials expected to be environment variables, but you can hard code them here instead
 const FLICKR_API_KEY = process.env.FLICKR_API_KEY
 const FLICKR_SECRET = process.env.FLICKR_SECRET
 //const FLICKR_API_QUERY = { text: "paulfavs" }
 //const FLICKR_API_QUERY = { tags: "paulfavs" }
 const FLICKR_API_QUERY = { user_id: 'paul_clarke' }

 // Option to manually skip pages below a certain number, for results we know we already have.
 //
 // This is useful for retrying when building a list of photos as the Flickr API can stall
 // after handling a large number of requests. This provides a quick and dirty way to resume
 // from a manually specified point.
 //
 // NB: Can be any point before the last successful request; doesn't have to be exact it will
 // never record duplicates is keyed on Photo ID (which is unique).
 const SKIP_PAGES_BELOW_THRESHOLD = 0

 const db = new Datastore({ filename: 'photos.db', autoload: true })

 flickrOptions = {
  api_key: FLICKR_API_KEY,
  secret: FLICKR_SECRET,
  progress: false
 }

 Flickr.tokenOnly(flickrOptions, async (err, flickr) => {
  const query = FLICKR_API_QUERY
  const numberOfPages = await getNumberOfPhotos(flickr, query)

  if (SKIP_PAGES_BELOW_THRESHOLD > 0)
    console.log(`Skipping pages below page ${SKIP_PAGES_BELOW_THRESHOLD}…`)

  let promises = []
  for (let page = 1; page <= numberOfPages; page++) {
    if (page > SKIP_PAGES_BELOW_THRESHOLD) {
      const photosInPage = await getPhotosInPage(flickr, query, page)
      photosInPage.forEach(async (photo) => {
        const promise = new Promise(async resolve => {
          db.findOne({ photoId: photo.id }, async (err, doc) => {
            const pageDescription = `Page ${page} of ${numberOfPages}`
            if (doc) {
              // Use existing object
              // TODO Placeholder in case want to add additional data
              console.log(`Updating Photo #${doc.photoId} (${pageDescription}) - ${doc.title}`)
              const newDoc = doc
              db.update({ _id: doc._id }, newDoc)
            } else {
              // Add new object
              const newDoc = {
                photoId: photo.id,
                page: pageDescription,
                public: photo.ispublic === 1 ? true : false,
                url: await getLargestPhotoUrl(flickr, photo.id),
                title: photo.title,
                lastUpdated: new Date().toISOString()
              }
              console.log(`Added new Photo #${newDoc.photoId} (${pageDescription}) - ${newDoc.title}`)
              db.insert(newDoc)
            }
          })
          resolve(true)
        })
        promises.push(promise)
      })
    }
  }
  await Promise.all(promises)
  console.log("Completed!")

  // Simple hacky console log of entire DB once script is complete, so can easily manually capture output to a CSV
  console.log(`photoId,public,url,title,lastUpdated`)
  db.find({ }, function (err, docs) {
    docs.forEach(doc => console.log(`${doc.photoId},${doc.public},${doc.url},"${doc.title}",${doc.lastUpdated}`))
  });
  
 })

 const getNumberOfPhotos = async(flickr, query) => {
  return new Promise(resolve => {
    flickr.photos.search(query, (err, result) => {
      if (err) { throw new Error(err) }
      resolve(result.photos.pages)
    })
  })
 }

 const getPhotosInPage = async(flickr, query, page) => {
  return new Promise(resolve => {
    flickr.photos.search({
      ...query,
      page,
    }, (err, result) => {
      if (err) { throw new Error(err) }
      resolve(result.photos.photo)
    })
  })
 }

 const getLargestPhotoUrl = async(flickr, photo_id) => {
  return new Promise(resolve => {
    flickr.photos.getSizes({ photo_id }, (err, result) => {
      if (err) { throw new Error(err) }
      // Largest photo they have is always the last one in the list
      // NB: Usually, but not always, it has the label 'Original'.
      const largestPhotoUrl = result.sizes.size[result.sizes.size.length - 1].source
      resolve(largestPhotoUrl)
    })
  })
 }
	// 1. Add your API key and secret below
	// 2. Run `npm i flickrapi` to install the SDK dependency
	// 3. Run 'node auth.js` and follow the instructions
	// (will open browser window, need to paste number back in terminal and copy entire output)
	const Flickr = require('flickrapi')

	const FLICKR_API_KEY = 'YOUR-API-KEY'
	const FLICKR_SECRET = 'YOUR-API-SECRET'

	Flickr.authenticate({
	api_key: FLICKR_API_KEY,
	secret: FLICKR_SECRET,
	}, async (err, flickr) => {

	});
	const csv = require('csv-parser')
	const fs = require('fs')
	const results = []
	const download = require('image-downloader')
	const mkdirp = require('mkdirp')

	fs.createReadStream('photos.csv')
	.pipe(csv())
	.on('data', (data) => results.push(data))
	.on('end', async () => {
	for (const result of results) {
	await new Promise(resolve => {
	const { photoId, title, url } = result

	// Use hashing on path to file to avoid too many files in one directory
	const baseDir = `./images/${photoId[0]}${photoId[1]}`
	mkdirp.sync(baseDir)

	const dest = `${baseDir}/${photoId}.jpg`

	if (!fs.existsSync(dest)) {
	console.log(`Downloading image ${photoId} (${url.trim()})…"`)
	download.image({
	url: url.trim(),
	dest
	})
	.then(({ filename, image }) => {
	console.log('Saved image to', dest)
	resolve(true)
	})
	.catch((err) => console.error(err))
	} else {
	console.log(`Skipping image ${photoId} (${url.trim()})…" as already downloaded`)
	resolve(true)
	}
	})
	}
	});
	/**
	* Script to fetch list of images from flickr API
	*
	* For Flickr API docs see:
	* https://github.com/Pomax/node-flickrapi
	* https://www.flickr.com/services/api/flickr.photos.search.html
	**/

	require('dotenv').config()

	const Flickr = require('flickrapi')
	const Datastore = require('nedb')

	// Credentials expected to be environment variables, but you can hard code them here instead
	const FLICKR_API_KEY = process.env.FLICKR_API_KEY
	const FLICKR_SECRET = process.env.FLICKR_SECRET
	//const FLICKR_API_QUERY = { text: "paulfavs" }
	//const FLICKR_API_QUERY = { tags: "paulfavs" }
	const FLICKR_API_QUERY = { user_id: 'paul_clarke' }

	// Option to manually skip pages below a certain number, for results we know we already have.
	//
	// This is useful for retrying when building a list of photos as the Flickr API can stall
	// after handling a large number of requests. This provides a quick and dirty way to resume
	// from a manually specified point.
	//
	// NB: Can be any point before the last successful request; doesn't have to be exact it will
	// never record duplicates is keyed on Photo ID (which is unique).
	const SKIP_PAGES_BELOW_THRESHOLD = 0

	const db = new Datastore({ filename: 'photos.db', autoload: true })

	flickrOptions = {
	api_key: FLICKR_API_KEY,
	secret: FLICKR_SECRET,
	progress: false
	}

	Flickr.tokenOnly(flickrOptions, async (err, flickr) => {
	const query = FLICKR_API_QUERY
	const numberOfPages = await getNumberOfPhotos(flickr, query)

	if (SKIP_PAGES_BELOW_THRESHOLD > 0)
	console.log(`Skipping pages below page ${SKIP_PAGES_BELOW_THRESHOLD}…`)

	let promises = []
	for (let page = 1; page <= numberOfPages; page++) {
	if (page > SKIP_PAGES_BELOW_THRESHOLD) {
	const photosInPage = await getPhotosInPage(flickr, query, page)
	photosInPage.forEach(async (photo) => {
	const promise = new Promise(async resolve => {
	db.findOne({ photoId: photo.id }, async (err, doc) => {
	const pageDescription = `Page ${page} of ${numberOfPages}`
	if (doc) {
	// Use existing object
	// TODO Placeholder in case want to add additional data
	console.log(`Updating Photo #${doc.photoId} (${pageDescription}) - ${doc.title}`)
	const newDoc = doc
	db.update({ _id: doc._id }, newDoc)
	} else {
	// Add new object
	const newDoc = {
	photoId: photo.id,
	page: pageDescription,
	public: photo.ispublic === 1 ? true : false,
	url: await getLargestPhotoUrl(flickr, photo.id),
	title: photo.title,
	lastUpdated: new Date().toISOString()
	}
	console.log(`Added new Photo #${newDoc.photoId} (${pageDescription}) - ${newDoc.title}`)
	db.insert(newDoc)
	}
	})
	resolve(true)
	})
	promises.push(promise)
	})
	}
	}
	await Promise.all(promises)
	console.log("Completed!")

	// Simple hacky console log of entire DB once script is complete, so can easily manually capture output to a CSV
	console.log(`photoId,public,url,title,lastUpdated`)
	db.find({ }, function (err, docs) {
	docs.forEach(doc => console.log(`${doc.photoId},${doc.public},${doc.url},"${doc.title}",${doc.lastUpdated}`))
	});

	})

	const getNumberOfPhotos = async(flickr, query) => {
	return new Promise(resolve => {
	flickr.photos.search(query, (err, result) => {
	if (err) { throw new Error(err) }
	resolve(result.photos.pages)
	})
	})
	}

	const getPhotosInPage = async(flickr, query, page) => {
	return new Promise(resolve => {
	flickr.photos.search({
	...query,
	page,
	}, (err, result) => {
	if (err) { throw new Error(err) }
	resolve(result.photos.photo)
	})
	})
	}

	const getLargestPhotoUrl = async(flickr, photo_id) => {
	return new Promise(resolve => {
	flickr.photos.getSizes({ photo_id }, (err, result) => {
	if (err) { throw new Error(err) }
	// Largest photo they have is always the last one in the list
	// NB: Usually, but not always, it has the label 'Original'.
	const largestPhotoUrl = result.sizes.size[result.sizes.size.length - 1].source
	resolve(largestPhotoUrl)
	})
	})
	}