Preliminaries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series

Get data into DataFrame

Instantiate an empty DataFrame

df = DataFrame()

Load a DataFrame from a CSV file

df = pd.read_csv('file.csv')# often works
df = pd.read_csv('file.csv', header=0,
    index_col=0, quotechar='"',sep=':',
    na_values = ['na', '-', '.', ''])

Get data from inline CSV text to a DataFrame

from io import StringIO
data = """, Animal, Cuteness, Desirable
row-1, dog, 8.7, True
row-2, cat, 9.5, True
row-3, bat, 2.6, False"""
df = pd.read_csv(StringIO(data),
header=0, index_col=0,
skipinitialspace=True)

Load DataFrames from a Microsoft Excel file

# Each Excel sheet in a Python dictionary
workbook = pd.ExcelFile('file.xlsx')
d = {} # start with an empty dictionary
for sheet_name in workbook.sheet_names:
df = workbook.parse(sheet_name)
d[sheet_name] = df

Load a DataFrame from a MySQL database

import pymysql
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://'
+'USER:PASSWORD@HOST/DATABASE')
df = pd.read_sql_table('table', engine)

Data in Series then combine into a DataFrame

# Example 1 ...
s1 = Series(range(6))
s2 = s1 * s1
s2.index = s2.index + 2# misalign indexes
df = pd.concat([s1, s2], axis=1)
# Example 2 ...
s3 = Series({'Tom':1, 'Dick':4, 'Har':9})
s4 = Series({'Tom':3, 'Dick':2, 'Mar':5})
df = pd.concat({'A':s3, 'B':s4 }, axis=1)

Get a DataFrame from a Python dictionary

# default --- assume data is in columns
df = DataFrame({
'col0' : [1.0, 2.0, 3.0, 4.0],
'col1' : [100, 200, 300, 400]
})

Get a DataFrame from data in a Python dictionary

# --- use helper method for data in rows
df = DataFrame.from_dict({ # data by row
# rows as python dictionaries
'row0' : {'col0':0, 'col1':'A'},
'row1' : {'col0':1, 'col1':'B'}
}, orient='index')
df = DataFrame.from_dict({ # data by row
# rows as python lists
'row0' : [1, 1+1j, 'A'],
'row1' : [2, 2+2j, 'B']
}, orient='index')

Create play/fake data (useful for testing)

# --- simple - default integer indexes
df = DataFrame(np.random.rand(50,5))
# --- with a time-stamp row index:
df = DataFrame(np.random.rand(500,5))
df.index = pd.date_range('1/1/2005',
periods=len(df), freq='M')
# --- with alphabetic row and col indexes
# and a "groupable" variable
import string
import random
r = 52 # note: min r is 1; max r is 52
c = 5
df = DataFrame(np.random.randn(r, c),
columns = ['col'+str(i) for i in
range(c)],
index = list((string. ascii_uppercase+
string.ascii_lowercase)[0:r]))
df['group'] = list(
''.join(random.choice('abcde')
for _ in range(r)) )

Saving as DataFrame

Saving a DataFrame to a CSV file

df.to_csv('name.csv', encoding='utf-8')

Saving DataFrames to an Excel Workbook

from pandas import ExcelWriter
writer = ExcelWriter('filename.xlsx')
df1.to_excel(writer,'Sheet1')
df2.to_excel(writer,'Sheet2')
writer.save()

Saving a DataFrame to MySQL

import pymysql
from sqlalchemy import create_engine
e = create_engine('mysql+pymysql://' +
'USER:PASSWORD@HOST/DATABASE')
df.to_sql('TABLE',e, if_exists='replace')

Saving to Python objects

python d = df.to_dict() # to dictionary str = df.to_string() # to string m = df.as_matrix() # to numpy matrix

Working with the whole DataFrame

fabsta/Pandas cheat sheet.md

Preliminaries

Get data into DataFrame

Instantiate an empty DataFrame

Load a DataFrame from a CSV file

Get data from inline CSV text to a DataFrame

Load DataFrames from a Microsoft Excel file

Load a DataFrame from a MySQL database

Data in Series then combine into a DataFrame

Get a DataFrame from a Python dictionary

Get a DataFrame from data in a Python dictionary

Create play/fake data (useful for testing)

Saving as DataFrame

Saving a DataFrame to a CSV file

Saving DataFrames to an Excel Workbook

Saving a DataFrame to MySQL

Saving to Python objects

Working with the whole DataFrame

Working with Columns

Working with rows

Working with cells

qharlie commented Dec 7, 2016