Last active
August 29, 2015 13:58
-
-
Save doug-friedman/6d174b79e419bad61302 to your computer and use it in GitHub Desktop.
A quick R function to speed up reading in selected columns from a large csv file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
### COLS FUNCTION ### | |
##################### | |
## Given the file, fields, and their classes, R will import only the relevant columns | |
extractCols = function(file="", fields = c(), fields.class = c(), nrows=-1){ | |
# Read the first line of the file | |
first.line = read.csv(file, nrows=1) | |
# Check that the fields are present and get their indices | |
if(length(fields %in% names(first.line)) == length(fields)){ | |
fields.ind = which(names(first.line) %in% fields) | |
} | |
else{ | |
return("The specified fields are not present") | |
} | |
# Create vector to exclude unnecessary columns | |
cols.want = c() | |
for(i in 1:length(names(first.line))){ | |
if(i %in% fields.ind){ | |
cols.want[i] = fields.class[which(names(first.line)[i] == fields)] | |
} else { | |
cols.want[i] = "NULL" | |
} | |
} | |
return(read.csv(file, colClasses=cols.want, nrows=nrows)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment