Created
March 26, 2018 14:09
-
-
Save broschke/6feb5ea7ba2623300ff24baf051cd622 to your computer and use it in GitHub Desktop.
Snippet to move Pandas dataframe to CSV then upload to Vertica
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import os | |
import pyodbc | |
import time | |
#this is a sample dataframe created for testing | |
df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD')) | |
#create a list of the column names | |
col_list = list(df) | |
#give the dataframe a name, the name that you'd like your vertica table to be | |
df.name = 'random' | |
#write dataframe to csv file | |
df.to_csv('df.csv',index=False) | |
#grab the directory location of the file | |
data_location = os.path.realpath('df.csv') | |
#if you have a large dataframe, increase the sleep seconds so the program waits sufficient time for the csv to be written | |
time.sleep(10) | |
def create_table(cols): | |
'''This function writes the column names into the structure needed for a create table sql statement. | |
Note all columns are set to varchar(32). Casting will likely be needed in your sql statements later.''' | |
for i, val in enumerate(cols): | |
if (i+1) == len(cols): | |
yield val + " VARCHAR(32)" | |
else: | |
yield val + " VARCHAR(32)," | |
#call function on col_list | |
table_construct = [field for field in create_table(col_list)] | |
#create and close an empty txt file as the reject file for copy function | |
with open(os.path.join('reject.txt'), 'w'): | |
pass | |
#grab exceptions location | |
exceptions_location = os.path.realpath('reject.txt') | |
#build sql statement | |
vertica_table = 'dq.br_'+ df.name #adjust the schema as needed | |
begin_create = 'create table {} ('.format(vertica_table) | |
end_create = ');' | |
copy_statement = 'COPY {} FROM local \'{}\' EXCEPTIONS \'{}\' DELIMITER \',\';'.format(vertica_table,data_location,exceptions_location) | |
sql = begin_create + ' '.join(table_construct) + end_create + copy_statement | |
#connect to vertica and excecute sql | |
conn=pyodbc.connect(dsn='VerticaBR') | |
cur=conn.cursor() | |
cur.execute('drop table if exists {};'.format(vertica_table)) | |
cur.execute(sql) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment