Created
May 12, 2011 13:00
-
-
Save anonymous/968442 to your computer and use it in GitHub Desktop.
Function to get problematic data into PostgreSQL from R (uses RPostgreSQL)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to get data over to PostgreSQL quickly and with minimal loss | |
# of information due to data type conversions. | |
# Note that I have had cases where the pipe() call returned an error due | |
# to lack of memory. So care should be taken with this function | |
dbWriteTable.fast <- function (conn,name,value,row.names=FALSE, | |
overwrite=TRUE,fix.names=TRUE) { | |
# Conform names to PostgreSQL requirements | |
pg.names <- make.db.names(conn,names(value)) | |
# Convert variable names to lower case, if requested to do so | |
if (fix.names) { | |
pg.names <- tolower(pg.names) | |
} | |
# Drop the table | |
if (overwrite) { | |
dbGetQuery(conn,paste("DROP TABLE IF EXISTS",name)) | |
} | |
# Create the table (again?) | |
if (FALSE) { # Can't do TEMP TABLEs with handoff to psql | |
sql <- paste("CREATE TEMP TABLE",name) | |
} else { | |
sql <- paste("CREATE TABLE",name) | |
} | |
# Allow for row names, if necessary | |
if (row.names) { | |
sql <- paste(sql, "(row_names text, ") | |
} else { | |
sql <- paste(sql, "(") | |
} | |
# Determine the appropriate types of the various variables based on | |
# R type/class and characteristics of the data. | |
# Support for boolean, Date, bigint, double precision, | |
# varchar, character, and text | |
pg.type <- NA | |
length(pg.type) <- length <- dim(value)[2] | |
for (i in 1:length) { | |
if (is.logical(value[,i])) { | |
pg.type <- "boolean" | |
} else if (inherits(value[,i],"Date")) { | |
pg.type <- "date" | |
} else if (is.integer(value[,i])) { | |
pg.type <- "bigint" | |
} else if (is.double(value[,i])) { | |
pg.type <- "double precision" | |
} else { | |
temp <- as.character(value[,i]) | |
max.char <- max(nchar(temp)) | |
min.char <- min(nchar(temp)) | |
if (max.char >= 255) { | |
pg.type <- "text" | |
} else if (min.char==max.char) { | |
pg.type <- paste("character(",max.char,")",sep="") | |
} else { | |
pg.type <- paste("varchar(",max.char,")",sep="") | |
} | |
} | |
# Add SQL for variable to the existing string | |
sql <- paste(sql,pg.names[i]," ",pg.type,sep="") | |
if (i < length) { | |
sql <- paste(sql,", ",sep="") | |
} else { | |
sql <- paste(sql,")",sep="") | |
} | |
} | |
# Create the table | |
dbGetQuery(conn,sql) | |
dbGetQuery(conn,paste("ANALYZE",name)) | |
######### | |
info <- dbGetInfo(conn) | |
if (info$host=="") info$host <- "localhost" | |
result <- write.csv(value, file=pipe(paste( | |
" psql -h ",info$host," -U ",info$user," -d ",info$dbname," -c \"COPY ", | |
name," FROM STDIN CSV HEADER\"",sep="")) | |
,na="", row.names=row.names) # | |
######### | |
# Return TRUE (doesn't help much) | |
return(TRUE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment