import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
df = DataFrame()
df = pd.read_csv('file.csv')# often works
df = pd.read_csv('file.csv', header=0,
index_col=0, quotechar='"',sep=':',
na_values = ['na', '-', '.', ''])
from io import StringIO
data = """, Animal, Cuteness, Desirable
row-1, dog, 8.7, True
row-2, cat, 9.5, True
row-3, bat, 2.6, False"""
df = pd.read_csv(StringIO(data),
header=0, index_col=0,
skipinitialspace=True)
# Each Excel sheet in a Python dictionary
workbook = pd.ExcelFile('file.xlsx')
d = {} # start with an empty dictionary
for sheet_name in workbook.sheet_names:
df = workbook.parse(sheet_name)
d[sheet_name] = df
import pymysql
from sqlalchemy import create_engine
engine = create_engine('mysql+pymysql://'
+'USER:PASSWORD@HOST/DATABASE')
df = pd.read_sql_table('table', engine)
# Example 1 ...
s1 = Series(range(6))
s2 = s1 * s1
s2.index = s2.index + 2# misalign indexes
df = pd.concat([s1, s2], axis=1)
# Example 2 ...
s3 = Series({'Tom':1, 'Dick':4, 'Har':9})
s4 = Series({'Tom':3, 'Dick':2, 'Mar':5})
df = pd.concat({'A':s3, 'B':s4 }, axis=1)
# default --- assume data is in columns
df = DataFrame({
'col0' : [1.0, 2.0, 3.0, 4.0],
'col1' : [100, 200, 300, 400]
})
# --- use helper method for data in rows
df = DataFrame.from_dict({ # data by row
# rows as python dictionaries
'row0' : {'col0':0, 'col1':'A'},
'row1' : {'col0':1, 'col1':'B'}
}, orient='index')
df = DataFrame.from_dict({ # data by row
# rows as python lists
'row0' : [1, 1+1j, 'A'],
'row1' : [2, 2+2j, 'B']
}, orient='index')
# --- simple - default integer indexes
df = DataFrame(np.random.rand(50,5))
# --- with a time-stamp row index:
df = DataFrame(np.random.rand(500,5))
df.index = pd.date_range('1/1/2005',
periods=len(df), freq='M')
# --- with alphabetic row and col indexes
# and a "groupable" variable
import string
import random
r = 52 # note: min r is 1; max r is 52
c = 5
df = DataFrame(np.random.randn(r, c),
columns = ['col'+str(i) for i in
range(c)],
index = list((string. ascii_uppercase+
string.ascii_lowercase)[0:r]))
df['group'] = list(
''.join(random.choice('abcde')
for _ in range(r)) )
df.to_csv('name.csv', encoding='utf-8')
from pandas import ExcelWriter
writer = ExcelWriter('filename.xlsx')
df1.to_excel(writer,'Sheet1')
df2.to_excel(writer,'Sheet2')
writer.save()
import pymysql
from sqlalchemy import create_engine
e = create_engine('mysql+pymysql://' +
'USER:PASSWORD@HOST/DATABASE')
df.to_sql('TABLE',e, if_exists='replace')
python d = df.to_dict() # to dictionary str = df.to_string() # to string m = df.as_matrix() # to numpy matrix
Keep going, then I can turn this into a PDF and print it :) 👍