mindey · October 21, 2017 01:28
diff --git a/xarray_fundamentals.py b/xarray_fundamentals.py
 import pandas
 import xarray

 ds = xarray.Dataset(
  {'x': ([None], [1,2,3] ),
   'y': ([None], [4,5,6] )},
 )

 # *is equivalent to*

 df = pandas.DataFrame(
  {'x': [1,2,3],
   'y': [4,5,6]})
 )

 # check:
 df.to_xarray() == ds
 ds.to_dataframe() == df

 ### More Usefully: ###

 ds = xarray.Dataset(
  {'x': (['space'], [1,2]),
   'y': (['time'], [3,4,5])})

 ds.to_dataframe()

            x  y
 space time      
 0     0     1  3
      1     1  4
      2     1  5
 1     0     2  3
      1     2  4
      2     2  5

 # Or, working from scratch:

 ds = xarray.Dataset(
  {'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']), 
   'y': xarray.DataArray([4,5,6], coords=[['a','b','c']], dims=['index'])}
 )

 ds = xarray.Dataset(
  {'x': (['index'], [1,2,3]),
   'y': (['index'], [4,5,6])}
 )

 # *is equivalent to*

 df = pandas.DataFrame(
  {'x': [1,2,3],
   'y': [4,5,6]},
  index=pandas.Index(['a','b','c'], name='index')
 )



 # Examples of operations:

 ds['z'] = xarray.DataArray([7,8,9], coords=[['a','b','c']], dims=['index'])

 # *is* #

 df['z'] = pandas.Series([7,8,9], pandas.Index(['a','b','c'], name='index'))


 # Computation:

 ds.sum(dim='index')

 Dimensions:  ()
 Data variables:
    x        int64 6
    y        int64 15

 df.sum(axis=0)

 x     6
 y    15
 dtype: int64

 # In case of .apply, we go over variables,
 # i.e., equivalent to df.apply(lambda col: col, axis=0)

 ds.apply(lambda var: var**2)

 Dimensions:  (index: 3)
 Coordinates:
  * index    (index) <U1 'a' 'b' 'c'
 Data variables:
    x        (index) int64 1 4 9
    y        (index) int64 16 25 36
    
 # is #

 df.apply(lambda col: col**2, axis=0)

       x   y
 index       
 a      1  16
 b      4  25
 c      9  36

 # Another example:

 ds.apply(lambda var: var.sum())

 Dimensions:  ()
 Data variables:
    x        int64 6
    y        int64 15

 df.apply(lambda col: col.sum(), axis=0)

 x     6
 y    15
 dtype: int64

  
  
 # However, in xarray, broadcasting and alignment across dimensions happen automatically:

 xarray.Dataset(
  {'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']),
   'y': xarray.DataArray([4,5], coords=[['a','b']], dims=['index2'])}).to_dataframe()

              x  y
 index index2      
 a     a       1  4
      d       1  5
 b     a       2  4
      d       2  5
 c     a       3  4
      d       3  5


 # And, here is some professional useage example:

 import pandas
 import xarray

 # Create some dimensions
 x = pandas.np.linspace(-10,10,10)
 y = pandas.np.linspace(-20,20,20)
 (yy, xx) = pandas.np.meshgrid(y,x)

 # Make two different DataArrays with equal dimensions
 var1 = xarray.DataArray(pandas.np.random.randn(len(x),len(y)), coords=[x, y], dims=['x','y'])
 var2 = xarray.DataArray(-xx**2+yy**2, coords=[x, y], dims=['x','y'])

 # Save one DataArray as dataset
 ds = var1.to_dataset(name='var1')

 # Add second DataArray to existing dataset (ds)
 ds['var2'] = var2


                           var1        var2
 x          y                               
 -10.000000 -20.000000  0.250051  300.000000
           -17.894737 -0.136954  220.221607
           -15.789474 -0.156218  149.307479
           -13.684211 -0.214556   87.257618
           -11.578947 -1.011103   34.072022
 ...
            13.684211 -0.430594   87.257618
            15.789474  0.792872  149.307479
            17.894737 -0.648614  220.221607
            20.000000 -0.271610  300.000000

 [200 rows x 2 columns]
	import pandas
	import xarray

	ds = xarray.Dataset(
	{'x': ([None], [1,2,3] ),
	'y': ([None], [4,5,6] )},
	)

	# is equivalent to

	df = pandas.DataFrame(
	{'x': [1,2,3],
	'y': [4,5,6]})
	)

	# check:
	df.to_xarray() == ds
	ds.to_dataframe() == df

	### More Usefully: ###

	ds = xarray.Dataset(
	{'x': (['space'], [1,2]),
	'y': (['time'], [3,4,5])})

	ds.to_dataframe()

	x y
	space time
	0 0 1 3
	1 1 4
	2 1 5
	1 0 2 3
	1 2 4
	2 2 5

	# Or, working from scratch:

	ds = xarray.Dataset(
	{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']),
	'y': xarray.DataArray([4,5,6], coords=[['a','b','c']], dims=['index'])}
	)

	ds = xarray.Dataset(
	{'x': (['index'], [1,2,3]),
	'y': (['index'], [4,5,6])}
	)

	# is equivalent to

	df = pandas.DataFrame(
	{'x': [1,2,3],
	'y': [4,5,6]},
	index=pandas.Index(['a','b','c'], name='index')
	)



	# Examples of operations:

	ds['z'] = xarray.DataArray([7,8,9], coords=[['a','b','c']], dims=['index'])

	# is #

	df['z'] = pandas.Series([7,8,9], pandas.Index(['a','b','c'], name='index'))


	# Computation:

	ds.sum(dim='index')

	Dimensions: ()
	Data variables:
	x int64 6
	y int64 15

	df.sum(axis=0)

	x 6
	y 15
	dtype: int64

	# In case of .apply, we go over variables,
	# i.e., equivalent to df.apply(lambda col: col, axis=0)

	ds.apply(lambda var: var**2)

	Dimensions: (index: 3)
	Coordinates:
	* index (index) <U1 'a' 'b' 'c'
	Data variables:
	x (index) int64 1 4 9
	y (index) int64 16 25 36

	# is #

	df.apply(lambda col: col**2, axis=0)

	x y
	index
	a 1 16
	b 4 25
	c 9 36

	# Another example:

	ds.apply(lambda var: var.sum())

	Dimensions: ()
	Data variables:
	x int64 6
	y int64 15

	df.apply(lambda col: col.sum(), axis=0)

	x 6
	y 15
	dtype: int64



	# However, in xarray, broadcasting and alignment across dimensions happen automatically:

	xarray.Dataset(
	{'x': xarray.DataArray([1,2,3], coords=[['a','b','c']], dims=['index']),
	'y': xarray.DataArray([4,5], coords=[['a','b']], dims=['index2'])}).to_dataframe()

	x y
	index index2
	a a 1 4
	d 1 5
	b a 2 4
	d 2 5
	c a 3 4
	d 3 5


	# And, here is some professional useage example:

	import pandas
	import xarray

	# Create some dimensions
	x = pandas.np.linspace(-10,10,10)
	y = pandas.np.linspace(-20,20,20)
	(yy, xx) = pandas.np.meshgrid(y,x)

	# Make two different DataArrays with equal dimensions
	var1 = xarray.DataArray(pandas.np.random.randn(len(x),len(y)), coords=[x, y], dims=['x','y'])
	var2 = xarray.DataArray(-xx2+yy2, coords=[x, y], dims=['x','y'])

	# Save one DataArray as dataset
	ds = var1.to_dataset(name='var1')

	# Add second DataArray to existing dataset (ds)
	ds['var2'] = var2


	var1 var2
	x y
	-10.000000 -20.000000 0.250051 300.000000
	-17.894737 -0.136954 220.221607
	-15.789474 -0.156218 149.307479
	-13.684211 -0.214556 87.257618
	-11.578947 -1.011103 34.072022
	...
	13.684211 -0.430594 87.257618
	15.789474 0.792872 149.307479
	17.894737 -0.648614 220.221607
	20.000000 -0.271610 300.000000

	[200 rows x 2 columns]