Skip to content

Instantly share code, notes, and snippets.

@marcoemorais
Last active January 25, 2017 14:52
Show Gist options
  • Save marcoemorais/03dff1a06f49da16a3a0e0b245e5a692 to your computer and use it in GitHub Desktop.
Save marcoemorais/03dff1a06f49da16a3a0e0b245e5a692 to your computer and use it in GitHub Desktop.
Marco's pandas examples with DataFrame
#!/usr/bin/env python
import unittest
import pandas as pd
class AllTest(unittest.TestCase):
def test_all(self):
wf_data = {'id':['wf','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data)
self.assertFalse(wf_df['states'].isin(['s1','s3'])
.all()) # 2 of 3 states.
self.assertTrue(wf_df['states'].isin(['s1','s2','s3'])
.all()) # 3 of 3 states.
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class ApplyTest(unittest.TestCase):
def test_apply(self):
wf_data = {'id':['wf1','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data)
strs = wf_df.apply(lambda row: '%s,%s' % (row['id'], row['states']),
axis=1).tolist()
self.assertEqual(sorted(['wf1,s1', 'wf2,s2', 'wf3,s3']), sorted(strs))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import itertools
import unittest
import pandas as pd
class CrossProductTest(unittest.TestCase):
def test_cross_product(self):
aoi_data = {'aoi':['aoi1','aoi2','aoi3'], 'wkt':['pt1','pt2','pt3']}
aoi_df = pd.DataFrame(aoi_data)
num_aoi = aoi_df.shape[0]
scene_data = {'scene':['s1','s2','s3']}
scene_df = pd.DataFrame(scene_data)
num_scene = scene_df.shape[0]
df = pd.DataFrame([[aoi, scene] for aoi, scene in
itertools.product(aoi_df['aoi'], scene_df['scene'])],
columns=['aoi', 'scene'])
self.assertEqual(num_aoi * num_scene, df.shape[0])
self.assertEqual(sorted(['s1','s2','s3']),
sorted(df.loc[df['aoi'] == 'aoi1']['scene'].tolist()))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class GroupbyTest(unittest.TestCase):
def test_groupby(self):
algo_specs = {'id': ['a1','a1','a1','a2','a2'],
'source': ['s1','s2','s3','s3','s4']}
algo_specs_df = pd.DataFrame(algo_specs)
groupby_df = algo_specs_df.groupby(by='id')
algo_source_df = algo_specs_df.groupby(by='id')['source'].apply(list)
self.assertEqual(sorted(['s1','s2','s3']),
sorted(algo_source_df['a1']))
self.assertEqual(sorted(['s3','s4']),
sorted(algo_source_df['a2']))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class InnerJoinTest(unittest.TestCase):
def test_inner_join_one_to_one(self):
lhs_data = {'id':['id1', 'id2', 'id3'], 'data':[1, 2, 3]}
lhs_df = pd.DataFrame(lhs_data)
rhs_data = {'id':['id1', 'id2', 'id3'], 'data':[10, 20, 30]}
rhs_df = pd.DataFrame(rhs_data)
df = lhs_df.merge(rhs_df, on='id', how='inner')
self.assertEqual((3, 3), df.shape)
self.assertEqual(sorted(['id1', 'id2', 'id3']), sorted(df['id'].tolist()))
self.assertEqual(sorted([1, 2, 3]), sorted(df['data_x'].tolist()))
self.assertEqual(sorted([10, 20, 30]), sorted(df['data_y'].tolist()))
def test_inner_join_not_all_join(self):
lhs_data = {'id':['id1', 'id2', 'id3', 'id4'], 'data':[1, 2, 3, 4]}
lhs_df = pd.DataFrame(lhs_data)
rhs_data = {'id':['id1', 'id2', 'id3'], 'data':[10, 20, 30]}
rhs_df = pd.DataFrame(rhs_data)
df = lhs_df.merge(rhs_df, on='id', how='inner')
self.assertEqual((3, 3), df.shape)
self.assertEqual(sorted(['id1', 'id2', 'id3']), sorted(df['id'].tolist()))
self.assertEqual(sorted([1, 2, 3]), sorted(df['data_x'].tolist()))
self.assertEqual(sorted([10, 20, 30]), sorted(df['data_y'].tolist()))
def test_inner_join_composite(self):
lhs_data = {'id1':['id1', 'id2', 'id3'],
'id2':['idA', 'idB', 'idC'],
'data':[1, 2, 3]}
lhs_df = pd.DataFrame(lhs_data)
rhs_data = {'id1':['id1', 'id2', 'id4'],
'id2':['idA', 'idB', 'idD'],
'data':[10, 20, 40]}
rhs_df = pd.DataFrame(rhs_data)
df = lhs_df.merge(rhs_df, on=['id1', 'id2'], how='inner')
self.assertEqual((2, 4), df.shape)
self.assertEqual(sorted(['id1', 'id2']), sorted(df['id1'].tolist()))
self.assertEqual(sorted(['idA', 'idB']), sorted(df['id2'].tolist()))
self.assertEqual(sorted([1, 2]), sorted(df['data_x'].tolist()))
self.assertEqual(sorted([10, 20]), sorted(df['data_y'].tolist()))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class IsInTest(unittest.TestCase):
def test_isin(self):
algo_specs = {'id': ['a1','a1','a1','a2','a2'],
'source': ['s1','s2','s3','s3','s4'],
'valid_sources': [['s1','s2']] * 5}
algo_specs_df = pd.DataFrame(algo_specs)
is_valid_sources_mask = algo_specs_df['source'].isin(algo_specs_df['valid_sources'].values)
#groupby_df = algo_specs_df.groupby(by='id')
#algo_source_df = algo_specs_df.groupby(by='id')['source'].apply(list)
#self.assertEqual(sorted(['s1','s2','s3']),
# sorted(algo_source_df['a1']))
#self.assertEqual(sorted(['s3','s4']),
# sorted(algo_source_df['a2']))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class LeftRightJoinTest(unittest.TestCase):
def test_left_join(self):
lhs_data = {'id':['id1', 'id2', 'id3', 'id4'], 'data':[1, 2, 3, 4]}
lhs_df = pd.DataFrame(lhs_data)
rhs_data = {'id':['id1', 'id2', 'id3'], 'data':[1, 2, 3]}
rhs_df = pd.DataFrame(rhs_data)
df = lhs_df.merge(rhs_df, left_on='id', right_on='id', how='left')
self.assertEqual((4, 3), df.shape)
self.assertEqual(sorted(['id1', 'id2', 'id3', 'id4']),
sorted(df['id'].tolist()))
self.assertEqual(sorted([1, 2, 3, 4]),
sorted(df['data_x'].tolist()))
self.assertEqual(sorted([1, 2, 3]), # Missing values filled as NaN.
sorted(df[df['data_y'].notnull()]['data_y'].tolist()))
def test_right_join(self):
lhs_data = {'id':['id1', 'id2', 'id3', 'id4'], 'data':[1, 2, 3, 4]}
lhs_df = pd.DataFrame(lhs_data)
rhs_data = {'id':['id1', 'id2', 'id3'], 'data':[1, 2, 3]}
rhs_df = pd.DataFrame(rhs_data)
df = lhs_df.merge(rhs_df, left_on='id', right_on='id', how='right')
self.assertEqual((3, 3), df.shape)
self.assertEqual(sorted(['id1', 'id2', 'id3']),
sorted(df['id'].tolist()))
self.assertEqual(sorted([1, 2, 3]),
sorted(df['data_x'].tolist()))
self.assertEqual(sorted([1, 2, 3]),
sorted(df['data_y'].tolist()))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class MaskTest(unittest.TestCase):
def test_query(self):
wf_data = {'id':['wf1','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data)
mask_df = wf_df[(wf_df['id'] == 'wf1') | (wf_df['id'] == 'wf3')]
expected_df = pd.DataFrame([('wf1', 's1'), ('wf3', 's3')],
columns=['id', 'states'])
for column in ['id', 'states']:
self.assertEqual(sorted(mask_df[column].tolist()),
sorted(expected_df[column].tolist()))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class QueryTest(unittest.TestCase):
def test_query(self):
wf_data = {'id':['wf1','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data)
query_df = wf_df.query('id == "wf1" or id == "wf3"')
expected_df = pd.DataFrame([('wf1', 's1'), ('wf3', 's3')],
columns=['id', 'states'])
for column in ['id', 'states']:
self.assertEqual(sorted(query_df[column].tolist()),
sorted(expected_df[column].tolist()))
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class ReindexTest(unittest.TestCase):
def test_reindex(self):
wf_data = {'id':['wf1','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data, index=wf_data['id'])
wf_order = ['wf2', 'wf3', 'wf1']
reindex_wf_df = wf_df.reindex(wf_order)
self.assertEqual(wf_order, reindex_wf_df['id'].tolist())
if __name__ == '__main__':
unittest.main(verbosity=2)
#!/usr/bin/env python
import unittest
import pandas as pd
class ToDictTest(unittest.TestCase):
def test_to_dict(self):
wf_data = {'id':['wf1','wf2','wf3'], 'states':['s1','s2','s3']}
wf_df = pd.DataFrame(wf_data)
wf_dict = wf_df.to_dict(orient='records')
self.assertEqual(sorted([{'id': 'wf1', 'states': 's1'},
{'id': 'wf2', 'states': 's2'},
{'id': 'wf3', 'states': 's3'}]),
sorted(wf_dict))
if __name__ == '__main__':
unittest.main(verbosity=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment