Created
April 26, 2019 09:37
-
-
Save xnuk/3ed5e3868c2eccd5b78b56d41aa184cc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { parse, HTMLElement, TextNode } from 'node-html-parser' | |
import { get } from 'http' | |
export interface Theater { | |
code: string | |
name: string | |
city: string | |
area: string | |
} | |
const zipWith = <A, B, T>(a: A[], b: B[], func: (a: A, b: B) => T): T[] => | |
Array.from({length: a.length > b.length ? b.length : a.length}, (_, i) => func(a[i], b[i])) | |
const getText = (v: HTMLElement | TextNode): string => { | |
if (v instanceof TextNode) { | |
return v.rawText | |
} | |
return v.childNodes | |
.filter(z => z instanceof TextNode) | |
.map(z => z.rawText).join(' ') | |
} | |
const parser = (html: string): Theater[] => { | |
const el = parse(html, { | |
script: false, | |
style: false, | |
lowerCaseTagName: false, | |
pre: false | |
}) | |
if (el instanceof TextNode) return [] | |
const table = el.querySelector('table.tbl_exc') | |
const ths = table.querySelector('thead') | |
.querySelectorAll('th') | |
.map(getText) | |
const trs = table.querySelector('tbody') | |
.querySelectorAll('tr') | |
.map(v => v.querySelectorAll('td').map(getText)) | |
return trs.map(row => | |
Object.assign({}, ...zipWith(ths, row, (k, v) => ({[k]: v}))) | |
).filter(row => row.영업상태 === '영업').map(row => ({ | |
code: row.영화상영관코드, | |
name: row.영화상영관명, | |
city: row.광역단체, | |
area: row.기초단체, | |
})) | |
} | |
const url = 'http://www.kobis.or.kr/kobis/business/mast/thea/findTheaterInfoListXls.do' | |
const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0' | |
const request = () => new Promise<string>((ok, err) => | |
get(url, { headers: { 'User-Agent': firefox } }, res => { | |
res.setEncoding('utf8') | |
let data: string = ''; | |
res.on('data', ch => data += ch) | |
res.on('end', () => ok(data)) | |
}).on('error', e => err(e)) | |
) | |
export const download = async () => console.log(JSON.stringify(parser(await request()))) | |
if (require.main === module) { | |
download() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment