Created
May 1, 2017 03:28
-
-
Save syon/2d1c451790c32edc40e0f916d7c06de5 to your computer and use it in GitHub Desktop.
OGP with cheerio on Node.js (Sample)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fetch = require('node-fetch'); | |
const cheerio = require('cheerio'); | |
urls = [] | |
urls.push('http://qiita.com/horike37/items/b295a91908fcfd4033a2') | |
urls.push('http://qiita.com/ryo0301/items/7f9fd8024987526cbc30') | |
urls.push('http://inokara.hateblo.jp/entry/2017/04/30/092304') | |
urls.push('https://togetter.com/li/1105672') | |
urls.push('https://allabout.co.jp/gm/gc/469010/') | |
urls.push('http://anond.hatelabo.jp/20170429110724') | |
urls.push('http://internet.watch.impress.co.jp/docs/news/1055983.html') | |
urls.push('http://qiita.com/chanibarin/items/48d5f5946b1e6c97e379') | |
urls.push('http://gigazine.net/news/20170410-pix-dt350n/') | |
urls.push('http://www.coconoodollblog.net/entry/2017/04/17/%E3%80%8C%E3%81%A7%E3%81%8D%E3%81%9F%EF%BC%81_%E3%83%AD%E3%83%95%E3%83%88%E3%82%92%E4%BD%9C%E3%82%8A%E3%81%BE%E3%81%97%E3%81%9F%E3%80%82%E2%91%A1%E3%80%8D%E6%97%A5%E6%9B%9C%E5%A4%A7%E5%B7%A5%E5%A5%B3') | |
urls.push('http://www.dtmstation.com/archives/51994323.html') | |
urls.push('http://www.itmedia.co.jp/news/articles/1704/07/news058.html') | |
urls.forEach((url) => { | |
const info = getPageInfo(url) | |
info.then(data => { | |
console.log('=============================') | |
console.log(data) | |
}) | |
}) | |
async function getPageInfo(url) { | |
const metaProps = await getMetaProps(url) | |
const site_name = resolveSiteName(metaProps) | |
const title = resolveTitle(metaProps) | |
const description = resolveDesc(metaProps) | |
const image = resolveImageUrl(metaProps) | |
return { site_name, title, description, image } | |
} | |
function resolveSiteName(metaProps) { | |
const ogSiteName = getMetaPropContent(metaProps, 'og:site_name') | |
if (ogSiteName) return ogSiteName | |
return '(No SiteName)' | |
} | |
function resolveTitle(metaProps) { | |
const ogTitle = getMetaPropContent(metaProps, 'og:title') | |
if (ogTitle) return ogTitle | |
return '(No Title)' | |
} | |
function resolveDesc(metaProps) { | |
const ogDesc = getMetaPropContent(metaProps, 'og:description') | |
if (ogDesc) return ogDesc | |
return '' | |
} | |
function resolveImageUrl(metaProps) { | |
const ogImage = getMetaPropContent(metaProps, 'og:image') | |
if (ogImage) return ogImage | |
return '' | |
} | |
function getMetaPropContent(metaProps, propKey) { | |
const mpObj = metaProps.find((d, i, arr) => { | |
return d[propKey] | |
}) | |
if (mpObj) return mpObj[propKey] | |
return '' | |
} | |
async function getMetaProps(url) { | |
const result = await fetch(url).then(res => { | |
if (res.ok) { return res.text() } | |
}).then(html => { | |
const metaProps = extractMetaProps(html) | |
return metaProps | |
}).catch(e => { | |
throw e | |
}) | |
return result | |
} | |
function extractMetaProps(html) { | |
const $ = cheerio.load(html) | |
let results = [] | |
$('head meta').each((i, el) => { | |
const property = $(el).attr('property') | |
const content = $(el).attr('content') | |
if (property && content) { | |
results.push({ [property]: content }) | |
} | |
}) | |
results.sort((a,b) => { | |
if (Object.keys(a)[0] < Object.keys(b)[0]) return -1 | |
if (Object.keys(a)[0] > Object.keys(b)[0]) return 1 | |
return 0 | |
}) | |
return results | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Result