Created
November 20, 2017 06:10
-
-
Save jonocarroll/b17ce021b0637a31f584ed08a1fbe733 to your computer and use it in GitHub Desktop.
Read a transposed (variables in rows) CSV file into R correctly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Based on | |
| ## https://stackoverflow.com/a/17289991/4168169 | |
| read.tcsv = function(file, header=TRUE, sep=",", ...) { | |
| n = max(count.fields(file, sep=sep), na.rm=TRUE) | |
| x = readLines(file) | |
| .splitvar = function(x, sep, n) { | |
| var = unlist(strsplit(x, split=sep)) | |
| length(var) = n | |
| return(var) | |
| } | |
| x = do.call(cbind, lapply(x, .splitvar, sep=sep, n=n)) | |
| x = apply(x, 1, paste, collapse=sep) | |
| ## empty strings are converted to NA | |
| out = read.csv(text=x, sep=sep, header=header, na.strings = "", ...) | |
| return(out) | |
| } | |
| ## If we have a transposed CSV file | |
| ## (rows are data, should be columns) | |
| cat(readLines("tdata.csv"), sep = "\n") | |
| #> var1,1,2,3,4 | |
| #> var2,0.8354,0.8359,0.5590,0.8812 | |
| #> var3,apple,banana,,orange | |
| ## We can read this in and transpose using read.tcsv above | |
| (tdata <- read.tcsv("tdata.csv")) | |
| #> var1 var2 var3 | |
| #> 1 1 0.8354 apple | |
| #> 2 2 0.8359 banana | |
| #> 3 3 0.5590 <NA> | |
| #> 4 4 0.8812 orange | |
| ## Which interprets values correctly | |
| str(tdata) | |
| #> 'data.frame': 4 obs. of 3 variables: | |
| #> $ var1: int 1 2 3 4 | |
| #> $ var2: num 0.835 0.836 0.559 0.881 | |
| #> $ var3: Factor w/ 3 levels "apple","banana",..: 1 2 NA 3 | |
| ## Including not making factors if we wish | |
| (tdata <- read.tcsv("tdata.csv", stringsAsFactors = FALSE)) | |
| #> var1 var2 var3 | |
| #> 1 1 0.8354 apple | |
| #> 2 2 0.8359 banana | |
| #> 3 3 0.5590 <NA> | |
| #> 4 4 0.8812 orange | |
| str(tdata) | |
| #> 'data.frame': 4 obs. of 3 variables: | |
| #> $ var1: int 1 2 3 4 | |
| #> $ var2: num 0.835 0.836 0.559 0.881 | |
| #> $ var3: chr "apple" "banana" NA "orange" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment