Skip to content

Instantly share code, notes, and snippets.

@gotomypc
Forked from mrjjwright/gist:3240020
Created October 28, 2012 08:11
Show Gist options
  • Save gotomypc/3968033 to your computer and use it in GitHub Desktop.
Save gotomypc/3968033 to your computer and use it in GitHub Desktop.
Extract largest image thumbnail from url using node and cheerio
cheerio = require('cheerio')
Shred = require('shred')
shred = new Shred()
http = require('http')
URL = require('url')
server = http.createServer (request, response) ->
url = URL.parse(request.url, true)
urlToDiscover = url.query['url']
startDiscovery urlToDiscover, (theImageURL) ->
if theImageURL?
response.writeHead(200, "Content-Type": 'application/json')
response.end(JSON.stringify(theImageURL))
else
console.log("Did not find image for #{urlToDiscover}")
response.writeHead(404, "Cant find theImage")
response.end()
server.listen(process.env.PORT || 5000, '0.0.0.0')
startDiscovery = (url, cb) ->
shred.get
url: url
on:
redirect: (response) ->
discoverImage(response.request.url, null, cb)
200: (response) ->
discoverImage(response.request.url, response.content.data, cb)
discoverImage = (url, data, cb) ->
discoveredURL = discoverImageFromURL(url)
if data
discoveredURL ||= discoverImageFromData(url, data)
discoveredURL = cleanURL(url, discoveredURL)
cb(discoveredURL)
cleanURL = (baseURL, discoveredURL) ->
return null unless discoveredURL
return "#{baseURL}#{discoveredURL}" if discoveredURL.indexOf("/") == 0
return discoveredURL
discoverImageFromURL = (url) ->
console.log(url.indexOf('yfrog'))
if url.indexOf('yfrog') != -1
return url + ":medium"
discoverImageFromData = (url, data) ->
$ = cheerio.load(data)
images = $('img')
maxImage = null
maxDimension = 0
for image in images
currDimension = $(image).attr('width') * $(image).attr('height')
if currDimension > maxDimension
maxDimension = currDimension
maxImage = image
return $(maxImage).attr('src')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment