Created
March 29, 2020 21:35
-
-
Save zHaytam/7ce777a1346211bd21c58b9229e837ec to your computer and use it in GitHub Desktop.
A simple scraper for covidmaroc.ma
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require("axios"); | |
const cheerio = require('cheerio'); | |
const covidmarocUrl = 'http://www.covidmaroc.ma/Pages/AccueilAR.aspx'; | |
const chromeUserAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'; | |
const moroccoPopulation = 36.79; | |
function cleanText(text) { | |
return text.replace(/\u200B/g, '').trim(); | |
} | |
function safeParseInt(text) { | |
return parseInt(cleanText(text)); | |
} | |
function extractGlobalStats($table) { | |
const row = $table.find('tbody>tr:nth-child(2)'); | |
const recoveredAndDeathsParagraph = row.find('td:nth-child(1)'); | |
const recovered = safeParseInt(recoveredAndDeathsParagraph.find('font').text()); | |
const total = safeParseInt(row.find('td:nth-child(2)').text()); | |
const deaths = safeParseInt(recoveredAndDeathsParagraph.find('span').text()); | |
return { | |
active: total - recovered - deaths, | |
recovered: recovered, | |
deaths: deaths, | |
total: total, | |
negative: safeParseInt(row.find('td:nth-child(3)').text()), | |
casesPerMil: total / moroccoPopulation, | |
deathsPerMil: deaths / moroccoPopulation | |
}; | |
} | |
function extractCasesByCity($, $table) { | |
return $table.find('tbody>tr') | |
.filter(i => i > 0) | |
.map((i, tr) => { | |
return { | |
'city': cleanText($(tr).find('th').text()), | |
'cases': safeParseInt($(tr).find('td').text()) | |
} | |
}) | |
.get() | |
.filter(e => e.cases > 0); | |
} | |
async function getWebsiteContent() { | |
// Specify User-Agent to bypass the 403 | |
const result = await axios.get(covidmarocUrl, { | |
headers: { | |
'User-Agent': chromeUserAgent | |
} | |
}); | |
return cheerio.load(result.data); | |
} | |
async function getStats() { | |
$ = await getWebsiteContent(); | |
const tables = $('table'); | |
const globalStats = extractGlobalStats($(tables[0])); | |
const casesByCity = extractCasesByCity($, $(tables[1])); | |
return { | |
'globalStats': globalStats, | |
'casesByCity': casesByCity | |
}; | |
} | |
module.exports.getStats = getStats; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment