Created
April 26, 2019 02:26
-
-
Save xnuk/595f4de9631698694fa34731c891f48a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { readFileSync, writeFileSync } from 'fs' | |
import { parse, HTMLElement, TextNode } from 'node-html-parser' | |
// import { CssSelectorParser } from 'css-selector-parser' | |
const zipWith = <A, B, T>(a: A[], b: B[], func: (a: A, b: B) => T): T[] => | |
Array.from({length: a.length > b.length ? b.length : a.length}, (_, i) => func(a[i], b[i])) | |
// const parser = new CssSelectorParser() | |
// .registerSelectorPseudos('not', 'has', 'host', 'host-context', 'is', 'where') | |
// .registerAttrEqualityMods('^', '*', '~', '$') | |
// .registerNestingOperators('+', '~', '>') | |
// .disableSubstitutes() | |
// console.log(JSON.stringify(parser.parse(':a(a[href])'))) | |
// http://www.kobis.or.kr/kobis/business/mast/thea/findTheaterInfoListXls.do | |
const html = readFileSync('./kobis.html', 'utf8') | |
const el = parse(html, { | |
script: false, | |
style: false, | |
lowerCaseTagName: false, | |
pre: false | |
}) | |
const getText = (v: HTMLElement | TextNode): string => { | |
if (v instanceof TextNode) { | |
return v.rawText | |
} | |
return v.childNodes | |
.filter(z => z instanceof TextNode) | |
.map(z => z.rawText).join(' ') | |
}; | |
if ('querySelector' in el) { | |
const table = el.querySelector('table.tbl_exc') | |
const ths = table.querySelector('thead') | |
.querySelectorAll('th') | |
.map(getText) | |
const trs = table.querySelector('tbody') | |
.querySelectorAll('tr') | |
.map(v => v.querySelectorAll('td').map(getText)) | |
const data = trs.map(row => | |
Object.assign({}, ...zipWith(ths, row, (k, v) => ({[k]: v}))) | |
).filter(row => row.영업상태 === '영업') | |
.map(row => ({ | |
code: row.영화상영관코드, | |
name: row.영화상영관명, | |
city: row.광역단체, | |
area: row.기초단체, | |
})) | |
writeFileSync('bar.json', JSON.stringify(data)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment