Last active
January 3, 2021 13:41
-
-
Save ivopbernardo/fcda8df7e0d22446e30e28282518d9e7 to your computer and use it in GitHub Desktop.
cleaning FBI crime data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Loading readxl library | |
library(readxl) | |
clean_crime_data <- function(path) { | |
# Load the Data | |
crime_data <- read_xls(path) | |
# Assigning colnames | |
colnames(crime_data) <- crime_data[3,] | |
# Create an auxiliary column that will help | |
# to filter the rows | |
crime_data['converted_index'] = sapply(crime_data[,1], as.numeric) | |
# Filter out the rows don't interest us | |
crime_data_filter = ( | |
crime_data[!is.na( | |
crime_data[,'converted_index'] | |
),] | |
) | |
# Apply substring to the year | |
crime_data_filter$Year = substr(crime_data_filter$Year, 1,4) | |
# Filter out columns that do not matter | |
crime_data_filter = crime_data_filter[,!grepl( "rate" , names(crime_data_filter))] | |
# Transform our table into numeric | |
pipeline_table <- data.frame(sapply(crime_data_filter, as.numeric)) | |
# Return our pipeline_table | |
pipeline_table | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment