EarlGlynn · January 16, 2014 05:35
diff --git a/R-IO-readcsv-writecsv.R b/R-IO-readcsv-writecsv.R
 # read.csv / write.csv examples described here:
 # http://earlglynn.github.io/R/input-output/readcsv-writecsv/index.html

 #setwd("")   # Set working directory if necessary

 # Examples
 # 1. Read Sample.csv input file with read.csv.
 # 2. Explain some details of data.frame
 # 3. Write Sample-Copy.csv output file with write.csv.
 # 4. Compare Sample.csv to Sample-Copy.csv.

 # efg, 2014-01-15

 ############################################################
 ### Read csv file into data.frame

 rawdata <- read.csv("Sample.csv")
 rawdata

 dim(rawdata)   # dimensions
 nrow(rawdata)  # number of rows
 ncol(rawdata)  # number of columns

 names(rawdata)      # column names
 row.names(rawdata)

 str(rawdata)   # structure

 ############################################################
 ### Addressing elements of data.frame

 # Column of data
 rawdata$Grade
 rawdata$Age
 rawdata$Score   # typo here!

 # Indexing like matrix:  [rows, columns]
 rawdata[2,4]    # single element: row 2, column 4
 rawdata[2,]     # row 2
 rawdata[,4]     # column 4

 rawdata[1:2,c(1,3,5)]  # first 2 rows of columns 1, 3 and 5

 ### Lookup:  associate memory, hash, dictionary

 # Single element
 rawdata["2", "Amount"]   # row name "2", "Amount" column

 # Rows with names "2" and "1", columns with names
 # "Start", "Code" and "Age"
 rawdata[c("2","1"), c("Start", "Code", "Age")]

 ############################################################
 ### Suppress factors:  stringsAsFactors=FALSE

 # "Factors" are stored as integers internally
 rawdata$Code
 as.integer(rawdata$Code)
 as.character(rawdata$Code)

 # Re-read using stringsAsFactors=FALSE
 rawdata <- read.csv("Sample.csv", stringsAsFactors=FALSE)
 rawdata
 str(rawdata)

 ############################################################
 ### Control data types by column:  colClasses

 rawdata <- read.csv("Sample.csv",
           colClasses=c("factor", "integer", "character",
                        "numeric", "Date"))
 str(rawdata)

 # Data type suggestions:
 # * Use "factor" for small, well-defined list of discrete values.
 # * Use "character" for strings that may need to be
 #     edited/modified/searched.
 # * In vectors integers take 4 bytes, numeric doubles take 8 bytes.
 #   E.g., for N <- 1000000L,
 #     object.size(rep(0L,N))  is 4000000 + overhead,
 #     object.size(rep(0.0,N)) is 8000000 + overhead,
 #     where overhead is 24 bytes in 32-bit R and 40 bytes in 64-bit R.
 # * The "best" format for Dates is ISO 8601.

 ############################################################
 ### Selected rows and columns

 selected <- read.csv("Sample.csv", nrow=2, header=TRUE,
                     colClasses=c("NULL","integer","NULL",
                                  "numeric","NULL"))
 selected

 ############################################################
 ### write.csv

 # Default write.csv
 write.csv(rawdata)  # echo to screen to review
 write.csv(rawdata, "Sample-Copy-Default.csv")

 # Drop row names.
 write.csv(rawdata, row.names=FALSE)  # echo to screen
 write.csv(rawdata, "Sample-Copy-No-RowNames.csv", row.names=FALSE)

 # Exactly the same as original file (for this case)
 write.csv(rawdata, row.names=FALSE, quote=FALSE, na="") # echo to screen
 write.csv(rawdata, "Sample-Copy-Exact.csv", row.names=FALSE, quote=FALSE, na="")
	# read.csv / write.csv examples described here:
	# http://earlglynn.github.io/R/input-output/readcsv-writecsv/index.html

	#setwd("") # Set working directory if necessary

	# Examples
	# 1. Read Sample.csv input file with read.csv.
	# 2. Explain some details of data.frame
	# 3. Write Sample-Copy.csv output file with write.csv.
	# 4. Compare Sample.csv to Sample-Copy.csv.

	# efg, 2014-01-15

	############################################################
	### Read csv file into data.frame

	rawdata <- read.csv("Sample.csv")
	rawdata

	dim(rawdata) # dimensions
	nrow(rawdata) # number of rows
	ncol(rawdata) # number of columns

	names(rawdata) # column names
	row.names(rawdata)

	str(rawdata) # structure

	############################################################
	### Addressing elements of data.frame

	# Column of data
	rawdata$Grade
	rawdata$Age
	rawdata$Score # typo here!

	# Indexing like matrix: [rows, columns]
	rawdata[2,4] # single element: row 2, column 4
	rawdata[2,] # row 2
	rawdata[,4] # column 4

	rawdata[1:2,c(1,3,5)] # first 2 rows of columns 1, 3 and 5

	### Lookup: associate memory, hash, dictionary

	# Single element
	rawdata["2", "Amount"] # row name "2", "Amount" column

	# Rows with names "2" and "1", columns with names
	# "Start", "Code" and "Age"
	rawdata[c("2","1"), c("Start", "Code", "Age")]

	############################################################
	### Suppress factors: stringsAsFactors=FALSE

	# "Factors" are stored as integers internally
	rawdata$Code
	as.integer(rawdata$Code)
	as.character(rawdata$Code)

	# Re-read using stringsAsFactors=FALSE
	rawdata <- read.csv("Sample.csv", stringsAsFactors=FALSE)
	rawdata
	str(rawdata)

	############################################################
	### Control data types by column: colClasses

	rawdata <- read.csv("Sample.csv",
	colClasses=c("factor", "integer", "character",
	"numeric", "Date"))
	str(rawdata)

	# Data type suggestions:
	# * Use "factor" for small, well-defined list of discrete values.
	# * Use "character" for strings that may need to be
	# edited/modified/searched.
	# * In vectors integers take 4 bytes, numeric doubles take 8 bytes.
	# E.g., for N <- 1000000L,
	# object.size(rep(0L,N)) is 4000000 + overhead,
	# object.size(rep(0.0,N)) is 8000000 + overhead,
	# where overhead is 24 bytes in 32-bit R and 40 bytes in 64-bit R.
	# * The "best" format for Dates is ISO 8601.

	############################################################
	### Selected rows and columns

	selected <- read.csv("Sample.csv", nrow=2, header=TRUE,
	colClasses=c("NULL","integer","NULL",
	"numeric","NULL"))
	selected

	############################################################
	### write.csv

	# Default write.csv
	write.csv(rawdata) # echo to screen to review
	write.csv(rawdata, "Sample-Copy-Default.csv")

	# Drop row names.
	write.csv(rawdata, row.names=FALSE) # echo to screen
	write.csv(rawdata, "Sample-Copy-No-RowNames.csv", row.names=FALSE)

	# Exactly the same as original file (for this case)
	write.csv(rawdata, row.names=FALSE, quote=FALSE, na="") # echo to screen
	write.csv(rawdata, "Sample-Copy-Exact.csv", row.names=FALSE, quote=FALSE, na="")
No results found