Last active
October 21, 2017 01:28
-
-
Save mindey/f06f7c9d2c1aa17950dc16a90f6a9acb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas | |
import xarray | |
ds = xarray.Dataset( | |
{'x': ([None], [1,2,3] ), | |
'y': ([None], [4,5,6] )}, | |
) | |
# *is equivalent to* | |
df = pandas.DataFrame( | |
{'x': [1,2,3], | |
'y': [4,5,6]}) | |
) | |
# check: | |
df.to_xarray() == ds | |
ds.to_dataframe() == df | |
### More Usefully: ### | |
ds = xarray.Dataset( | |
{'x': (['space'], [1,2]), | |
'y': (['time'], [3,4,5])}) | |
ds.to_dataframe() | |
x y | |
space time | |
0 0 1 3 | |
1 1 4 | |
2 1 5 | |
1 0 2 3 | |
1 2 4 | |
2 2 5 | |
# Or, working from scratch: | |
ds = xarray.Dataset( | |
{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']), | |
'y': xarray.DataArray([4,5,6], coords=[['a','b','c']], dims=['index'])} | |
) | |
ds = xarray.Dataset( | |
{'x': (['index'], [1,2,3]), | |
'y': (['index'], [4,5,6])} | |
) | |
# *is equivalent to* | |
df = pandas.DataFrame( | |
{'x': [1,2,3], | |
'y': [4,5,6]}, | |
index=pandas.Index(['a','b','c'], name='index') | |
) | |
# Examples of operations: | |
ds['z'] = xarray.DataArray([7,8,9], coords=[['a','b','c']], dims=['index']) | |
# *is* # | |
df['z'] = pandas.Series([7,8,9], pandas.Index(['a','b','c'], name='index')) | |
# Computation: | |
ds.sum(dim='index') | |
Dimensions: () | |
Data variables: | |
x int64 6 | |
y int64 15 | |
df.sum(axis=0) | |
x 6 | |
y 15 | |
dtype: int64 | |
# In case of .apply, we go over variables, | |
# i.e., equivalent to df.apply(lambda col: col, axis=0) | |
ds.apply(lambda var: var**2) | |
Dimensions: (index: 3) | |
Coordinates: | |
* index (index) <U1 'a' 'b' 'c' | |
Data variables: | |
x (index) int64 1 4 9 | |
y (index) int64 16 25 36 | |
# is # | |
df.apply(lambda col: col**2, axis=0) | |
x y | |
index | |
a 1 16 | |
b 4 25 | |
c 9 36 | |
# Another example: | |
ds.apply(lambda var: var.sum()) | |
Dimensions: () | |
Data variables: | |
x int64 6 | |
y int64 15 | |
df.apply(lambda col: col.sum(), axis=0) | |
x 6 | |
y 15 | |
dtype: int64 | |
# However, in xarray, broadcasting and alignment across dimensions happen automatically: | |
xarray.Dataset( | |
{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']), | |
'y': xarray.DataArray([4,5], coords=[['a','b']], dims=['index2'])}).to_dataframe() | |
x y | |
index index2 | |
a a 1 4 | |
d 1 5 | |
b a 2 4 | |
d 2 5 | |
c a 3 4 | |
d 3 5 | |
# And, here is some professional useage example: | |
import pandas | |
import xarray | |
# Create some dimensions | |
x = pandas.np.linspace(-10,10,10) | |
y = pandas.np.linspace(-20,20,20) | |
(yy, xx) = pandas.np.meshgrid(y,x) | |
# Make two different DataArrays with equal dimensions | |
var1 = xarray.DataArray(pandas.np.random.randn(len(x),len(y)), coords=[x, y], dims=['x','y']) | |
var2 = xarray.DataArray(-xx**2+yy**2, coords=[x, y], dims=['x','y']) | |
# Save one DataArray as dataset | |
ds = var1.to_dataset(name='var1') | |
# Add second DataArray to existing dataset (ds) | |
ds['var2'] = var2 | |
var1 var2 | |
x y | |
-10.000000 -20.000000 0.250051 300.000000 | |
-17.894737 -0.136954 220.221607 | |
-15.789474 -0.156218 149.307479 | |
-13.684211 -0.214556 87.257618 | |
-11.578947 -1.011103 34.072022 | |
... | |
13.684211 -0.430594 87.257618 | |
15.789474 0.792872 149.307479 | |
17.894737 -0.648614 220.221607 | |
20.000000 -0.271610 300.000000 | |
[200 rows x 2 columns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment