This Gist was automatically created by Carbide, a free online programming environment.
Last active
September 17, 2020 05:14
-
-
Save bijection/50de8e5dbd8cc2ffd3c396aab3d0256d to your computer and use it in GitHub Desktop.
GRAIN Data Cleaning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import './system_config.js' | |
var xlsx = require('xlsx') /// Let's start by defining a function that can get us an xls file remotely. | |
export default function getXls(url){ | |
var f = new FileReader() | |
fetch(url) | |
.then(x => x.blob()) | |
.then(x => f.readAsBinaryString(x)) | |
return new Promise((resolve, reject) => { | |
f.onloadend = () => resolve(xlsx.read(f.result, {type:"binary"})) | |
}) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import getXls from './cell0.js'; | |
window.sheet; /// Now, we grab the file and stick it in a global variable 'sheet' | |
var res = await getXls('https://commondatastorage.googleapis.com/ckannet-storage/2012-08-14T085537/GRAIN---Land-grab-deals---Jan-2012.xls') | |
window.sheet = res.Sheets.Sheet1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
window.cols = {} /// 'sheet' is a giant object right now, but we'd really like it as a json list. to get it there, let's start by extracting the names of the columns. | |
Object.keys(window.sheet) | |
.filter(key => key.slice(1) === '1') | |
.forEach(key => cols[key[0]] = sheet[key].v) | |
window.cols |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
window.json = [] /// Now that we have the column names, let's go ahead and fill in the rest of our json list. | |
Object.keys(window.sheet).forEach(key => { | |
var row = Number(key.slice(1)) | |
if(!row || row < 2) return; | |
var rowIndex = row - 2 | |
var colIndex = key[0] | |
json[rowIndex] = json[rowIndex] || {} | |
json[rowIndex][window.cols[colIndex]] = window.sheet[key].v | |
}) | |
json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var investmentRules = [] /// Now let's normalize the invenstments! | |
window.normalizeInvestment = function normalizeInvestment(investment){ | |
for (var rule of investmentRules){ | |
if(rule(investment)) return rule(investment) | |
} | |
return false | |
} | |
function normalized(){ | |
return json.map(row => row['Projected investment']) | |
.filter(x=>x) | |
.map(normalizeInvestment) | |
} | |
function stillAbnormal(){ | |
return json.map(row => row['Projected investment']) | |
.filter(x=>x) | |
.filter(x => !normalizeInvestment(x)) | |
} | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d+(\.\d+)?)\s+million/) | |
return match ? Number(match[1]) : false | |
}) | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d)+,(\d+)\s+million/) | |
return match ? Number(match[1] + match[2]) : false | |
}) | |
stillAbnormal() | |
investmentRules.push( investment => { | |
var match = investment.match(/^US\$(\d+(\.\d+)?)\s+billion/) | |
return match ? Number(match[1])*1000 : false | |
}) | |
stillAbnormal().join('\n') | |
json.filter(row => row['Projected investment'] && row['Projected investment'].match(/\/yr/)) | |
investmentRules.push( investment => { /// Let's do some special cases!! Woohoo!! | |
switch(investment){ | |
case "US$30-35 million": | |
return 32 | |
case "US$4/ha/yr (lease)": | |
return 7 /// $35 yrs \*50000 ha | |
case "US$57,600 (US$0.80/ha)": | |
return .0576 | |
case "US$125,000/yr (land lease)": | |
return 3 /// $125,000 a year for 25 years | |
case "US$8/ha/yr (lease)": | |
return 5.4 | |
case "US$1.2/ha/yr (after first 7 years) in Gambela and US$8/ha/yr (after first 6 years) in Bako": | |
return 10 /// I am not patient enough to figure this one out lol. Hopefully it's around 10 | |
} | |
return false | |
}) | |
stillAbnormal() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
window.cleaned = [] | |
window.json.forEach(row => { | |
var investment = row['Projected investment'] | |
var cleaned_row = {...row} | |
delete cleaned_row['Projected investment'] | |
cleaned_row['Projected Investment'] = 'unknown' | |
if(investment) { | |
cleaned_row['Projected Investment'] = normalizeInvestment(investment) | |
} | |
cleaned.push(cleaned_row) | |
}) | |
window.cleaned |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import {say} from 'cowsay' | |
say({text: "hello there!\n \nWelcome to the Carbide Alpha\nRelease Data Cleaning\nExample Notebook!\n \nJoin us for an interactive\njourney through an excel file!\n\nRun each of the following\ncells in order!\n \nSend bugs to\[email protected]!\n \nSend hellos to \[email protected]! "}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
System.config({ | |
"meta": { | |
'https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.8.0/xlsx.full.min.js': { | |
exports: 'XLSX', | |
format: 'global' | |
} | |
}, | |
"map": { | |
"xlsx": 'https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.8.0/xlsx.full.min.js' | |
} | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment