Skip to content

Instantly share code, notes, and snippets.

@xnuk
Created July 30, 2018 14:51
Show Gist options
  • Save xnuk/0f2b62e1c1e9890a7c5822086b725212 to your computer and use it in GitHub Desktop.
Save xnuk/0f2b62e1c1e9890a7c5822086b725212 to your computer and use it in GitHub Desktop.
const {parse, SELECTOR, DATA, CONVERT} = require('crawl-it')
const http = require('http')
const httpGet = (url, cb) => http.get(url, res => {
if(res.statusCode !== 200) return;
res.setEncoding('utf8')
let body = ''
res.on('data', ch => body += ch)
res.on('end', () => cb(body))
})
const parseTemplate = {
meta: {
[SELECTOR]: '#header>h1',
where: '.tit/text()',
when: '.tim'
},
grapes: [{
[SELECTOR]: 'li[id^="jsAnchorDust"]',
name: {
[DATA]: './.tit',
[CONVERT]: v => v.replace(/환경기준/, '').trim()
},
id: '.graph2@id'
}],
// script parsing
script: {
[DATA]: 'body>script:last-of-type/text()',
[CONVERT]: v => (v.split('//7가지 물질 차트생성')[1] || '')
.split('new google.visualization.DataTable();')
.map(para => {
const [beforeRender, after] = para.split('new google.visualization.LineChart(')
const maybeHere = beforeRender.split('data.addRows([')[1];
if(maybeHere == null || after == null) return null
const id = (after.match(/getElementById\('([^']+)'\)\)/) || [])[1]
const data = maybeHere.split(');')[0].split('[').map(v => {
const m = v.match(/['"]([0-2]?[0-9])시['"]\s*,\s*([0-9\.]+)\s*,/)
return m ? {hour: (m[1] | 0), val: m[2]} : m
}).filter(v => v != null)
return {id, data}
}).filter(v => v != null)
},
[CONVERT]: ({meta, grapes, script}) => {
grapes.forEach(o => {
const sameIdData = script.filter(v => v.id === o.id)[0]
if(sameIdData != null) o.data = sameIdData.data
})
return {...meta, grapes}
}
}
httpGet({
hostname: 'm.airkorea.or.kr',
// 황간역
path: '/main?lat=36.2250075&lng=127.9120465',
headers: {
Accept: 'text/html;charset=utf-8',
'Accept-Charset': 'utf-8',
// User Agent is important. It should be given.
'User-Agent': 'curl/7.54.0'
}
}, body => {
const result = parse(body, parseTemplate)
console.dir(result)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment