This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import random | |
| import string | |
| def make_data(start_date='2019-01-01',n_data=30,n_num_var=5,n_cat_var=5,n_cat_var_cardinality_upper=10): | |
| ''' Function to make some data and put it in a df | |
| ''' | |
| dates = pd.date_range(start_date,periods=n_data) | |
| df = pd.DataFrame() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_lines_multi(df,lw=2,pw=700,ph=400,t_str="hover,save,pan,box_zoom,reset,wheel_zoom",t_loc='above'): | |
| '''... | |
| ''' | |
| source = ColumnDataSource(df) | |
| col_names = source.column_names | |
| p = figure(x_axis_type="datetime",plot_width=pw, plot_height=ph,toolbar_location=t_loc, tools=t_str) | |
| p_dict = dict() | |
| for col, c, col_name in zip(df.columns,color,col_names): | |
| p_dict[col_name] = p.line('index', col, source=source, color=c,line_width=lw) | |
| p.add_tools(HoverTool( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import papermill as pm | |
| import multiprocessing | |
| import os | |
| import argparse | |
| import json | |
| def run_papermill(config): | |
| ''' Function to run notebook(s) in paralell using papermill. | |
| ''' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "config_bank": { | |
| "notebook": "notebooks/data_explorer.ipynb", | |
| "data_url": "https://raw.githubusercontent.com/andrewm4894/papermill_dev/master/data/bank-full.csv", | |
| "output_label": "bank" | |
| }, | |
| "config_adult": { | |
| "notebook": "notebooks/data_explorer.ipynb", | |
| "data_url": "https://raw.githubusercontent.com/andrewm4894/papermill_dev/master/data/adult.csv", | |
| "output_label": "adult" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import weka.core.Instances; | |
| import weka.core.converters.ConverterUtils.DataSource; | |
| import weka.filters.Filter; | |
| import weka.filters.unsupervised.attribute.Add; | |
| import java.util.ArrayList; | |
| import java.util.List; | |
| import java.util.Random; | |
| public class wekaDev { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.kohsuke.args4j.CmdLineParser; | |
| import org.kohsuke.args4j.Option; | |
| /** | |
| * Hello world! class that is paramaterized (with defaults) using arg4j. | |
| * Example cli usage: java -jar helloWorldParamaterized --msg='Hello arg4j!' | |
| */ | |
| public class helloWorldParamaterized | |
| { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.datavec.api.records.reader.RecordReader; | |
| import org.datavec.api.records.reader.impl.csv.CSVRecordReader; | |
| import org.datavec.api.split.FileSplit; | |
| import org.datavec.api.transform.TransformProcess; | |
| import org.datavec.api.transform.schema.Schema; | |
| import org.datavec.api.writable.Writable; | |
| import org.datavec.local.transforms.LocalTransformExecutor; | |
| import org.nd4j.linalg.io.ClassPathResource; | |
| import java.io.File; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package org.datavec.transform.basic; | |
| import org.datavec.api.records.reader.RecordReader; | |
| import org.datavec.api.records.reader.impl.csv.CSVRecordReader; | |
| import org.datavec.api.split.FileSplit; | |
| import org.datavec.api.transform.TransformProcess; | |
| import org.datavec.api.transform.schema.Schema; | |
| import org.datavec.api.transform.transform.sequence.SequenceOffsetTransform; | |
| import org.datavec.api.writable.Writable; | |
| import org.datavec.local.transforms.LocalTransformExecutor; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # this does not work | |
| pipeline = { | |
| {"resample" : {"type" : "trans", "name" : "resample", "kwargs" : {"rule" : "1min"}}} | |
| } | |
| pipeline | |
| ''' | |
| --------------------------------------------------------------------------- | |
| TypeError Traceback (most recent call last) | |
| <ipython-input-10-7305ba79e664> in <module> | |
| 1 pipeline = { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from multiprocessing import Pool | |
| from functools import partial | |
| import numpy as np | |
| import pandas as pd | |
| def parallelize_dataframe(df, func, n_pool=4, col_subsets=None, join_how='outer',**kwargs): | |
| ''' | |
| Function to take a df, a function with args, and a list of column subsets to apply function to. | |
| Resulting list of df's are then joined back together based on the df index. |