Skip to content

Instantly share code, notes, and snippets.

@mindey
Last active October 21, 2017 01:28
Show Gist options
  • Save mindey/f06f7c9d2c1aa17950dc16a90f6a9acb to your computer and use it in GitHub Desktop.
Save mindey/f06f7c9d2c1aa17950dc16a90f6a9acb to your computer and use it in GitHub Desktop.
import pandas
import xarray
ds = xarray.Dataset(
{'x': ([None], [1,2,3] ),
'y': ([None], [4,5,6] )},
)
# *is equivalent to*
df = pandas.DataFrame(
{'x': [1,2,3],
'y': [4,5,6]})
)
# check:
df.to_xarray() == ds
ds.to_dataframe() == df
### More Usefully: ###
ds = xarray.Dataset(
{'x': (['space'], [1,2]),
'y': (['time'], [3,4,5])})
ds.to_dataframe()
x y
space time
0 0 1 3
1 1 4
2 1 5
1 0 2 3
1 2 4
2 2 5
# Or, working from scratch:
ds = xarray.Dataset(
{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']),
'y': xarray.DataArray([4,5,6], coords=[['a','b','c']], dims=['index'])}
)
ds = xarray.Dataset(
{'x': (['index'], [1,2,3]),
'y': (['index'], [4,5,6])}
)
# *is equivalent to*
df = pandas.DataFrame(
{'x': [1,2,3],
'y': [4,5,6]},
index=pandas.Index(['a','b','c'], name='index')
)
# Examples of operations:
ds['z'] = xarray.DataArray([7,8,9], coords=[['a','b','c']], dims=['index'])
# *is* #
df['z'] = pandas.Series([7,8,9], pandas.Index(['a','b','c'], name='index'))
# Computation:
ds.sum(dim='index')
Dimensions: ()
Data variables:
x int64 6
y int64 15
df.sum(axis=0)
x 6
y 15
dtype: int64
# In case of .apply, we go over variables,
# i.e., equivalent to df.apply(lambda col: col, axis=0)
ds.apply(lambda var: var**2)
Dimensions: (index: 3)
Coordinates:
* index (index) <U1 'a' 'b' 'c'
Data variables:
x (index) int64 1 4 9
y (index) int64 16 25 36
# is #
df.apply(lambda col: col**2, axis=0)
x y
index
a 1 16
b 4 25
c 9 36
# Another example:
ds.apply(lambda var: var.sum())
Dimensions: ()
Data variables:
x int64 6
y int64 15
df.apply(lambda col: col.sum(), axis=0)
x 6
y 15
dtype: int64
# However, in xarray, broadcasting and alignment across dimensions happen automatically:
xarray.Dataset(
{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']),
'y': xarray.DataArray([4,5], coords=[['a','b']], dims=['index2'])}).to_dataframe()
x y
index index2
a a 1 4
d 1 5
b a 2 4
d 2 5
c a 3 4
d 3 5
# And, here is some professional useage example:
import pandas
import xarray
# Create some dimensions
x = pandas.np.linspace(-10,10,10)
y = pandas.np.linspace(-20,20,20)
(yy, xx) = pandas.np.meshgrid(y,x)
# Make two different DataArrays with equal dimensions
var1 = xarray.DataArray(pandas.np.random.randn(len(x),len(y)), coords=[x, y], dims=['x','y'])
var2 = xarray.DataArray(-xx**2+yy**2, coords=[x, y], dims=['x','y'])
# Save one DataArray as dataset
ds = var1.to_dataset(name='var1')
# Add second DataArray to existing dataset (ds)
ds['var2'] = var2
var1 var2
x y
-10.000000 -20.000000 0.250051 300.000000
-17.894737 -0.136954 220.221607
-15.789474 -0.156218 149.307479
-13.684211 -0.214556 87.257618
-11.578947 -1.011103 34.072022
...
13.684211 -0.430594 87.257618
15.789474 0.792872 149.307479
17.894737 -0.648614 220.221607
20.000000 -0.271610 300.000000
[200 rows x 2 columns]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment