Skip to content

Instantly share code, notes, and snippets.

@Ravenstine
Created July 9, 2017 04:16
Show Gist options
  • Save Ravenstine/fb52db94a5f5eef2daf05cb9e4dfd8a4 to your computer and use it in GitHub Desktop.
Save Ravenstine/fb52db94a5f5eef2daf05cb9e4dfd8a4 to your computer and use it in GitHub Desktop.
whuuuut
'use strict';
const cheerio = require('cheerio');
const YAML = require('js-yaml');
const TYPE = {
string(obj){
// Sometimes, when using the YAML parser, a valid string
// will seem like a K/V pair to the parser. For strings
// that are likely to contain colons with multiple lines,
// this helps to undo the damage.
if(typeof obj === 'object'){
let str = '';
Object.keys(obj).forEach((k) => {
let v = obj[k];
str += `${k}:\n${v}`;
});
return str;
} else {
return obj;
}
}
}
function tableTextToJson(text){
// We're doing a kind of cheat here to parse out CalFire incident table data.
// Instead of traversing the DOM or using Regexp, we're making an assumption
// about how the data is formatted when we strip out HTML tags. When the
// text is isolated from the HTML, it's uncannily similar to YAML because
// keys consistenly end with colons and the values are almost always on the
// following line. With a little cleanup, we can get a nice JSON structure
// by merely passing the text to the YAML parser.
//
// This works for both the index/results page as well as the table on
// individual incident pages.
let formatted = text.split(/$/m).map((l) => {
l = l.replace(/^\s*(\*)/, '\\*'); // remove potentially invalid token from beginning of line
return l.match(/:\s*$/mg) ? l.trim() : ` ${l.trim()}`; // indent lines holding values (not ending with colon)
}).join('\n');
let yaml = YAML.safeLoad(formatted);
return yaml;
}
function parseIncident(html, incident={}){
let $ = cheerio.load(html);
incident.id = (($('.list_decorative_blue a, .list_decorative_dark a')
.first().attr('href') || "")
.match(/\?incident_id=(\d+)/) || [])[1];
incident.name = $('h3.incident_h3').text();
let tableText = $('table#incident_information').text();
let json = tableTextToJson(tableText);
incident.updatedAt = json['Last Updated'] ? new Date(json['Last Updated']) : undefined;
incident.reportedAt = json['Date/Time Started'] ? new Date(json['Date/Time Started']) : undefined;
incident.administrativeUnit = json['Administrative Unit'];
incident.cooperatingAgencies = json['Cooperating Agencies'];
incident.county = ((json['County'] || '').match(/([A-z|\s]+) County/i) || [])[1];
incident.roadClosures = TYPE.string(json['Road Closures']);
incident.schoolClosures = TYPE.string(json['School Closures']);
incident.evacuations = TYPE.string(json['Evacuation Info'] || json['Evacuations']);
incident.injuries = json['Injuries'];
incident.phoneNumbers = json['Phone Numbers'];
incident.stats = incident.stats || {};
incident.stats.acres = ((json['Acres Burned - Containment'] || '').match(/(\d+) acres/i) || [])[1];
incident.stats.contained = ((json['Acres Burned - Containment'] || '').match(/(\d{1,3}%) contained/i) || [])[1];
incident.stats.contained = json['Estimated - Containment'] ? json['Estimated - Containment'] : incident.stats.contained;
incident.stats.cause = json['Cause'];
incident.stats.airtankers = json['Total Airtankers'];
incident.stats.bulldozers = json['Total Dozers'];
incident.stats.fireCrews = json['Total Fire Crews'];
incident.stats.fireEngines = json['Total Fire Engines'];
incident.stats.firefighters = json['Total Fire Personnel'];
incident.stats.helicopters = json['Total Helicopters'];
incident.stats.structuresDestroyed = json['Structures Destroyed'];
incident.stats.structuresThreatened = json['Structures Threatened'];
incident.stats.waterTenders = json['Total Water Tenders'];
incident.location = incident.location || {};
incident.location.description = TYPE.string(json['Location']);
let coordinates = (json['Long/Lat'] || '').match(/(\-*\d+\.\d+)\/(\-*\d+\.\d+)/) || [];
incident.location.coordinates = {
lat: coordinates[2],
lat: coordinates[1]
};
return incident;
}
module.exports = parseIncident;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment