Skip to content

Instantly share code, notes, and snippets.

@xnuk
Created April 26, 2019 09:37
Show Gist options
  • Save xnuk/3ed5e3868c2eccd5b78b56d41aa184cc to your computer and use it in GitHub Desktop.
Save xnuk/3ed5e3868c2eccd5b78b56d41aa184cc to your computer and use it in GitHub Desktop.
import { parse, HTMLElement, TextNode } from 'node-html-parser'
import { get } from 'http'
export interface Theater {
code: string
name: string
city: string
area: string
}
const zipWith = <A, B, T>(a: A[], b: B[], func: (a: A, b: B) => T): T[] =>
Array.from({length: a.length > b.length ? b.length : a.length}, (_, i) => func(a[i], b[i]))
const getText = (v: HTMLElement | TextNode): string => {
if (v instanceof TextNode) {
return v.rawText
}
return v.childNodes
.filter(z => z instanceof TextNode)
.map(z => z.rawText).join(' ')
}
const parser = (html: string): Theater[] => {
const el = parse(html, {
script: false,
style: false,
lowerCaseTagName: false,
pre: false
})
if (el instanceof TextNode) return []
const table = el.querySelector('table.tbl_exc')
const ths = table.querySelector('thead')
.querySelectorAll('th')
.map(getText)
const trs = table.querySelector('tbody')
.querySelectorAll('tr')
.map(v => v.querySelectorAll('td').map(getText))
return trs.map(row =>
Object.assign({}, ...zipWith(ths, row, (k, v) => ({[k]: v})))
).filter(row => row.영업상태 === '영업').map(row => ({
code: row.영화상영관코드,
name: row.영화상영관명,
city: row.광역단체,
area: row.기초단체,
}))
}
const url = 'http://www.kobis.or.kr/kobis/business/mast/thea/findTheaterInfoListXls.do'
const firefox = 'Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0'
const request = () => new Promise<string>((ok, err) =>
get(url, { headers: { 'User-Agent': firefox } }, res => {
res.setEncoding('utf8')
let data: string = '';
res.on('data', ch => data += ch)
res.on('end', () => ok(data))
}).on('error', e => err(e))
)
export const download = async () => console.log(JSON.stringify(parser(await request())))
if (require.main === module) {
download()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment