martindurant · February 15, 2017 18:22
diff --git a/simplest_dask_gcsfs.py b/simplest_dask_gcsfs.py
 import dask.dataframe as dd
 import gcsfs  # registers with dask filesystems

 # Initial authentication
 # gcs = gcsfs.core.GCSFileSystem(project='continuum-compute',
 # token='/Users/mdurant/.config/gcloud/application_default_credentials.json')

 # Tiny block-size to ensure more than one block
 df = da.read_csv('s3://blaze-data/iris/*', blocksize=2500,
                 names=['petal_length', 'sepal_length', 'petal_width',
                        'sepal_width', 'species'])
 df.to_parquet('gs://dask_example_data/iris',
              storage_options={'project': 'continuum-compute'})
 df2 = dd.read_parquet('gs://dask_example_data/iris',
                      storage_options={'project': 'continuum-compute'})
 # suggest project as part of URL: gs://continuum-compute@dask_example_data/iris , 
 # does not yet work

 df2.compute()
	import dask.dataframe as dd
	import gcsfs # registers with dask filesystems

	# Initial authentication
	# gcs = gcsfs.core.GCSFileSystem(project='continuum-compute',
	# token='/Users/mdurant/.config/gcloud/application_default_credentials.json')

	# Tiny block-size to ensure more than one block
	df = da.read_csv('s3://blaze-data/iris/*', blocksize=2500,
	names=['petal_length', 'sepal_length', 'petal_width',
	'sepal_width', 'species'])
	df.to_parquet('gs://dask_example_data/iris',
	storage_options={'project': 'continuum-compute'})
	df2 = dd.read_parquet('gs://dask_example_data/iris',
	storage_options={'project': 'continuum-compute'})
	# suggest project as part of URL: gs://continuum-compute@dask_example_data/iris ,
	# does not yet work

	df2.compute()