Created
April 18, 2017 23:23
-
-
Save abehmiel/c62d3064ccc3704bb01cea850079110d to your computer and use it in GitHub Desktop.
Useful Pandas csv import functions. Original by Chris Albon
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Thanks to Chris Albon. Shamelessly lifted from: https://chrisalbon.com/python/pandas_dataframe_importing_csv.html | |
import pandas as pd | |
import numpy as np | |
# Create dataframe (that we will be importing) | |
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], | |
'last_name': ['Miller', 'Jacobson', ".", 'Milner', 'Cooze'], | |
'age': [42, 52, 36, 24, 73], | |
'preTestScore': [4, 24, 31, ".", "."], | |
'postTestScore': ["25,000", "94,000", 57, 62, 70]} | |
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore']) | |
# Save dataframe as csv in the working directory | |
df.to_csv('../data/example.csv') | |
# Load a csv | |
df = pd.read_csv('../data/example.csv') | |
# Load a csv with no headers | |
df = pd.read_csv('../data/example.csv', header=None) | |
#Load a csv while specifying column names | |
df = pd.read_csv('../data/example.csv', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score']) | |
# Load a csv with setting the index column to UID | |
df = pd.read_csv('../data/example.csv', index_col='UID', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score']) | |
# Load a csv while setting the index columns to First Name and Last Name | |
df = pd.read_csv('../data/example.csv', index_col=['First Name', 'Last Name'], names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score']) | |
# Load a csv while specifying "." as missing values | |
df = pd.read_csv('../data/example.csv', na_values=['.']) | |
#Load a csv while specifying "." and "NA" as missing values in the Last Name column and "." as missing values in Pre-Test Score column | |
sentinels = {'Last Name': ['.', 'NA'], 'Pre-Test Score': ['.']} | |
df = pd.read_csv('../data/example.csv', na_values=sentinels) | |
# Load a csv while skipping the top 3 rows | |
df = pd.read_csv('../data/example.csv', na_values=sentinels, skiprows=3) | |
# Load a csv while interpreting "," in strings around numbers as thousands seperators | |
df = pd.read_csv('../data/example.csv', thousands=',') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment