Skip to content

Instantly share code, notes, and snippets.

Created May 12, 2011 13:00
Show Gist options
  • Save anonymous/968442 to your computer and use it in GitHub Desktop.
Save anonymous/968442 to your computer and use it in GitHub Desktop.
Function to get problematic data into PostgreSQL from R (uses RPostgreSQL)
# Function to get data over to PostgreSQL quickly and with minimal loss
# of information due to data type conversions.
# Note that I have had cases where the pipe() call returned an error due
# to lack of memory. So care should be taken with this function
dbWriteTable.fast <- function (conn,name,value,row.names=FALSE,
overwrite=TRUE,fix.names=TRUE) {
# Conform names to PostgreSQL requirements
pg.names <- make.db.names(conn,names(value))
# Convert variable names to lower case, if requested to do so
if (fix.names) {
pg.names <- tolower(pg.names)
}
# Drop the table
if (overwrite) {
dbGetQuery(conn,paste("DROP TABLE IF EXISTS",name))
}
# Create the table (again?)
if (FALSE) { # Can't do TEMP TABLEs with handoff to psql
sql <- paste("CREATE TEMP TABLE",name)
} else {
sql <- paste("CREATE TABLE",name)
}
# Allow for row names, if necessary
if (row.names) {
sql <- paste(sql, "(row_names text, ")
} else {
sql <- paste(sql, "(")
}
# Determine the appropriate types of the various variables based on
# R type/class and characteristics of the data.
# Support for boolean, Date, bigint, double precision,
# varchar, character, and text
pg.type <- NA
length(pg.type) <- length <- dim(value)[2]
for (i in 1:length) {
if (is.logical(value[,i])) {
pg.type <- "boolean"
} else if (inherits(value[,i],"Date")) {
pg.type <- "date"
} else if (is.integer(value[,i])) {
pg.type <- "bigint"
} else if (is.double(value[,i])) {
pg.type <- "double precision"
} else {
temp <- as.character(value[,i])
max.char <- max(nchar(temp))
min.char <- min(nchar(temp))
if (max.char >= 255) {
pg.type <- "text"
} else if (min.char==max.char) {
pg.type <- paste("character(",max.char,")",sep="")
} else {
pg.type <- paste("varchar(",max.char,")",sep="")
}
}
# Add SQL for variable to the existing string
sql <- paste(sql,pg.names[i]," ",pg.type,sep="")
if (i < length) {
sql <- paste(sql,", ",sep="")
} else {
sql <- paste(sql,")",sep="")
}
}
# Create the table
dbGetQuery(conn,sql)
dbGetQuery(conn,paste("ANALYZE",name))
#########
info <- dbGetInfo(conn)
if (info$host=="") info$host <- "localhost"
result <- write.csv(value, file=pipe(paste(
" psql -h ",info$host," -U ",info$user," -d ",info$dbname," -c \"COPY ",
name," FROM STDIN CSV HEADER\"",sep=""))
,na="", row.names=row.names) #
#########
# Return TRUE (doesn't help much)
return(TRUE)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment