Created
January 16, 2014 05:35
-
-
Save EarlGlynn/8450305 to your computer and use it in GitHub Desktop.
read.csv / write.csv examples for KC R Users group, 2014-01-18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read.csv / write.csv examples described here: | |
# http://earlglynn.github.io/R/input-output/readcsv-writecsv/index.html | |
#setwd("") # Set working directory if necessary | |
# Examples | |
# 1. Read Sample.csv input file with read.csv. | |
# 2. Explain some details of data.frame | |
# 3. Write Sample-Copy.csv output file with write.csv. | |
# 4. Compare Sample.csv to Sample-Copy.csv. | |
# efg, 2014-01-15 | |
############################################################ | |
### Read csv file into data.frame | |
rawdata <- read.csv("Sample.csv") | |
rawdata | |
dim(rawdata) # dimensions | |
nrow(rawdata) # number of rows | |
ncol(rawdata) # number of columns | |
names(rawdata) # column names | |
row.names(rawdata) | |
str(rawdata) # structure | |
############################################################ | |
### Addressing elements of data.frame | |
# Column of data | |
rawdata$Grade | |
rawdata$Age | |
rawdata$Score # typo here! | |
# Indexing like matrix: [rows, columns] | |
rawdata[2,4] # single element: row 2, column 4 | |
rawdata[2,] # row 2 | |
rawdata[,4] # column 4 | |
rawdata[1:2,c(1,3,5)] # first 2 rows of columns 1, 3 and 5 | |
### Lookup: associate memory, hash, dictionary | |
# Single element | |
rawdata["2", "Amount"] # row name "2", "Amount" column | |
# Rows with names "2" and "1", columns with names | |
# "Start", "Code" and "Age" | |
rawdata[c("2","1"), c("Start", "Code", "Age")] | |
############################################################ | |
### Suppress factors: stringsAsFactors=FALSE | |
# "Factors" are stored as integers internally | |
rawdata$Code | |
as.integer(rawdata$Code) | |
as.character(rawdata$Code) | |
# Re-read using stringsAsFactors=FALSE | |
rawdata <- read.csv("Sample.csv", stringsAsFactors=FALSE) | |
rawdata | |
str(rawdata) | |
############################################################ | |
### Control data types by column: colClasses | |
rawdata <- read.csv("Sample.csv", | |
colClasses=c("factor", "integer", "character", | |
"numeric", "Date")) | |
str(rawdata) | |
# Data type suggestions: | |
# * Use "factor" for small, well-defined list of discrete values. | |
# * Use "character" for strings that may need to be | |
# edited/modified/searched. | |
# * In vectors integers take 4 bytes, numeric doubles take 8 bytes. | |
# E.g., for N <- 1000000L, | |
# object.size(rep(0L,N)) is 4000000 + overhead, | |
# object.size(rep(0.0,N)) is 8000000 + overhead, | |
# where overhead is 24 bytes in 32-bit R and 40 bytes in 64-bit R. | |
# * The "best" format for Dates is ISO 8601. | |
############################################################ | |
### Selected rows and columns | |
selected <- read.csv("Sample.csv", nrow=2, header=TRUE, | |
colClasses=c("NULL","integer","NULL", | |
"numeric","NULL")) | |
selected | |
############################################################ | |
### write.csv | |
# Default write.csv | |
write.csv(rawdata) # echo to screen to review | |
write.csv(rawdata, "Sample-Copy-Default.csv") | |
# Drop row names. | |
write.csv(rawdata, row.names=FALSE) # echo to screen | |
write.csv(rawdata, "Sample-Copy-No-RowNames.csv", row.names=FALSE) | |
# Exactly the same as original file (for this case) | |
write.csv(rawdata, row.names=FALSE, quote=FALSE, na="") # echo to screen | |
write.csv(rawdata, "Sample-Copy-Exact.csv", row.names=FALSE, quote=FALSE, na="") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment