Skip to content

Instantly share code, notes, and snippets.

@Barry1
Last active May 25, 2020 14:44
Show Gist options
  • Save Barry1/08c9d5d874bdfd586b311b6a22dd867c to your computer and use it in GitHub Desktop.
Save Barry1/08c9d5d874bdfd586b311b6a22dd867c to your computer and use it in GitHub Desktop.
some helpful ideas for H2O
import h2o
import pandas
Inputdaten = pandas.Dataframe()
PREDICT_COL_NAME = 'tbd'
def pddf2h2odf(pddf: pandas.DataFrame) -> h2o.H2OFrame:
# %% Sometimes, H2O adds duplicated to pandas-dataframes
# Solution from https://stackoverflow.com/a/59588925/617339
srcindexnameforlaterdeletion = pddf.index.name
h2odf = h2o.H2OFrame(pddf.reset_index())
if len(h2odf) != len(pddf):
reimport = h2odf.as_data_frame()
h2odf = h2odf.drop(reimport[reimport.duplicated()].index, axis=0)
return h2odf.drop(srcindexnameforlaterdeletion)
with h2o.backend.H2OLocalServer.start(verbose=__debug__, name="H2O-Cluster") as hs:
if __debug__:
print("==========SERVER-INFO==========")
print(hs)
print("===============================")
with h2o.connect(server=hs, verbose=__debug__) as hc:
if __debug__:
print("==========CONNECTION-INFO==========")
print(hc)
print("===================================")
DATA = pddf2h2odf(Inputdaten)
# http://archive.is/hBLPg
print(DATA.describe())
#FEATURES = DATA.columns.remove(PREDICT_COL_NAME) # : List[Any]
# features.remove(PREDICT_COL_NAME)
aml = h2o.automl.H2OAutoML(seed=1) # max_models=MAX_MODELS,
aml.train(y=PREDICT_COL_NAME, training_frame=DATA)#x=FEATURES,
print(aml.leader)
Inputdaten['H2O'] = aml.predict(test_data=DATA).as_data_frame().values
print(Inputdaten[[PREDICT_COL_NAME, 'H2O']])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment