Skip to content

Instantly share code, notes, and snippets.

@xnuk
Created April 26, 2019 02:26
Show Gist options
  • Save xnuk/595f4de9631698694fa34731c891f48a to your computer and use it in GitHub Desktop.
Save xnuk/595f4de9631698694fa34731c891f48a to your computer and use it in GitHub Desktop.
import { readFileSync, writeFileSync } from 'fs'
import { parse, HTMLElement, TextNode } from 'node-html-parser'
// import { CssSelectorParser } from 'css-selector-parser'
const zipWith = <A, B, T>(a: A[], b: B[], func: (a: A, b: B) => T): T[] =>
Array.from({length: a.length > b.length ? b.length : a.length}, (_, i) => func(a[i], b[i]))
// const parser = new CssSelectorParser()
// .registerSelectorPseudos('not', 'has', 'host', 'host-context', 'is', 'where')
// .registerAttrEqualityMods('^', '*', '~', '$')
// .registerNestingOperators('+', '~', '>')
// .disableSubstitutes()
// console.log(JSON.stringify(parser.parse(':a(a[href])')))
// http://www.kobis.or.kr/kobis/business/mast/thea/findTheaterInfoListXls.do
const html = readFileSync('./kobis.html', 'utf8')
const el = parse(html, {
script: false,
style: false,
lowerCaseTagName: false,
pre: false
})
const getText = (v: HTMLElement | TextNode): string => {
if (v instanceof TextNode) {
return v.rawText
}
return v.childNodes
.filter(z => z instanceof TextNode)
.map(z => z.rawText).join(' ')
};
if ('querySelector' in el) {
const table = el.querySelector('table.tbl_exc')
const ths = table.querySelector('thead')
.querySelectorAll('th')
.map(getText)
const trs = table.querySelector('tbody')
.querySelectorAll('tr')
.map(v => v.querySelectorAll('td').map(getText))
const data = trs.map(row =>
Object.assign({}, ...zipWith(ths, row, (k, v) => ({[k]: v})))
).filter(row => row.영업상태 === '영업')
.map(row => ({
code: row.영화상영관코드,
name: row.영화상영관명,
city: row.광역단체,
area: row.기초단체,
}))
writeFileSync('bar.json', JSON.stringify(data))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment