sergiolucero · December 7, 2017 15:35
diff --git a/lens_example.py b/lens_example.py
 # from http://docs.sherlockml.com/libraries/lens/getting_started.html
 import lens
 import pandas as pd

 df=pd.read_csv('300_casas_renam.csv.gz', compression='gzip')
 df=df[['id_x','text_x','hext_x','time','tint_y','hint_y','Co2_y','Ruido_y']]
 df=df.rename(columns={'id_x':'casa','text_x':'TempExt','hext_x':'HumedadExt',
            'hint_y':'HumedadInt','Co2_y':'CO2',
            'Ruido_y':'Ruido','tint_y':'TempInt'})

 df['Humedad']=pd.cut(df['HumedadInt'],5, 
                labels=['seco','bajo','medio','alto','humedo']).astype(str)

 ls = lens.summarise(df, scheduler='sync')   # 1m19s for 100K casas on the basic sherlock instance
 ls.to_json('lens_summary.json')
 explorer = lens.explore(ls)
 explorer.describe()
 explorer.correlation_plot()
	# from http://docs.sherlockml.com/libraries/lens/getting_started.html
	import lens
	import pandas as pd

	df=pd.read_csv('300_casas_renam.csv.gz', compression='gzip')
	df=df[['id_x','text_x','hext_x','time','tint_y','hint_y','Co2_y','Ruido_y']]
	df=df.rename(columns={'id_x':'casa','text_x':'TempExt','hext_x':'HumedadExt',
	'hint_y':'HumedadInt','Co2_y':'CO2',
	'Ruido_y':'Ruido','tint_y':'TempInt'})

	df['Humedad']=pd.cut(df['HumedadInt'],5,
	labels=['seco','bajo','medio','alto','humedo']).astype(str)

	ls = lens.summarise(df, scheduler='sync') # 1m19s for 100K casas on the basic sherlock instance
	ls.to_json('lens_summary.json')
	explorer = lens.explore(ls)
	explorer.describe()
	explorer.correlation_plot()