Created
July 9, 2017 04:16
-
-
Save Ravenstine/fb52db94a5f5eef2daf05cb9e4dfd8a4 to your computer and use it in GitHub Desktop.
whuuuut
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
const cheerio = require('cheerio'); | |
const YAML = require('js-yaml'); | |
const TYPE = { | |
string(obj){ | |
// Sometimes, when using the YAML parser, a valid string | |
// will seem like a K/V pair to the parser. For strings | |
// that are likely to contain colons with multiple lines, | |
// this helps to undo the damage. | |
if(typeof obj === 'object'){ | |
let str = ''; | |
Object.keys(obj).forEach((k) => { | |
let v = obj[k]; | |
str += `${k}:\n${v}`; | |
}); | |
return str; | |
} else { | |
return obj; | |
} | |
} | |
} | |
function tableTextToJson(text){ | |
// We're doing a kind of cheat here to parse out CalFire incident table data. | |
// Instead of traversing the DOM or using Regexp, we're making an assumption | |
// about how the data is formatted when we strip out HTML tags. When the | |
// text is isolated from the HTML, it's uncannily similar to YAML because | |
// keys consistenly end with colons and the values are almost always on the | |
// following line. With a little cleanup, we can get a nice JSON structure | |
// by merely passing the text to the YAML parser. | |
// | |
// This works for both the index/results page as well as the table on | |
// individual incident pages. | |
let formatted = text.split(/$/m).map((l) => { | |
l = l.replace(/^\s*(\*)/, '\\*'); // remove potentially invalid token from beginning of line | |
return l.match(/:\s*$/mg) ? l.trim() : ` ${l.trim()}`; // indent lines holding values (not ending with colon) | |
}).join('\n'); | |
let yaml = YAML.safeLoad(formatted); | |
return yaml; | |
} | |
function parseIncident(html, incident={}){ | |
let $ = cheerio.load(html); | |
incident.id = (($('.list_decorative_blue a, .list_decorative_dark a') | |
.first().attr('href') || "") | |
.match(/\?incident_id=(\d+)/) || [])[1]; | |
incident.name = $('h3.incident_h3').text(); | |
let tableText = $('table#incident_information').text(); | |
let json = tableTextToJson(tableText); | |
incident.updatedAt = json['Last Updated'] ? new Date(json['Last Updated']) : undefined; | |
incident.reportedAt = json['Date/Time Started'] ? new Date(json['Date/Time Started']) : undefined; | |
incident.administrativeUnit = json['Administrative Unit']; | |
incident.cooperatingAgencies = json['Cooperating Agencies']; | |
incident.county = ((json['County'] || '').match(/([A-z|\s]+) County/i) || [])[1]; | |
incident.roadClosures = TYPE.string(json['Road Closures']); | |
incident.schoolClosures = TYPE.string(json['School Closures']); | |
incident.evacuations = TYPE.string(json['Evacuation Info'] || json['Evacuations']); | |
incident.injuries = json['Injuries']; | |
incident.phoneNumbers = json['Phone Numbers']; | |
incident.stats = incident.stats || {}; | |
incident.stats.acres = ((json['Acres Burned - Containment'] || '').match(/(\d+) acres/i) || [])[1]; | |
incident.stats.contained = ((json['Acres Burned - Containment'] || '').match(/(\d{1,3}%) contained/i) || [])[1]; | |
incident.stats.contained = json['Estimated - Containment'] ? json['Estimated - Containment'] : incident.stats.contained; | |
incident.stats.cause = json['Cause']; | |
incident.stats.airtankers = json['Total Airtankers']; | |
incident.stats.bulldozers = json['Total Dozers']; | |
incident.stats.fireCrews = json['Total Fire Crews']; | |
incident.stats.fireEngines = json['Total Fire Engines']; | |
incident.stats.firefighters = json['Total Fire Personnel']; | |
incident.stats.helicopters = json['Total Helicopters']; | |
incident.stats.structuresDestroyed = json['Structures Destroyed']; | |
incident.stats.structuresThreatened = json['Structures Threatened']; | |
incident.stats.waterTenders = json['Total Water Tenders']; | |
incident.location = incident.location || {}; | |
incident.location.description = TYPE.string(json['Location']); | |
let coordinates = (json['Long/Lat'] || '').match(/(\-*\d+\.\d+)\/(\-*\d+\.\d+)/) || []; | |
incident.location.coordinates = { | |
lat: coordinates[2], | |
lat: coordinates[1] | |
}; | |
return incident; | |
} | |
module.exports = parseIncident; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment