Skip to content

Instantly share code, notes, and snippets.

View andrewm4894's full-sized avatar
💭
Agentic

Andrew Maguire andrewm4894

💭
Agentic
View GitHub Profile
import pandas as pd
import numpy as np
import random
import string
def make_data(start_date='2019-01-01',n_data=30,n_num_var=5,n_cat_var=5,n_cat_var_cardinality_upper=10):
''' Function to make some data and put it in a df
'''
dates = pd.date_range(start_date,periods=n_data)
df = pd.DataFrame()
def plot_lines_multi(df,lw=2,pw=700,ph=400,t_str="hover,save,pan,box_zoom,reset,wheel_zoom",t_loc='above'):
'''...
'''
source = ColumnDataSource(df)
col_names = source.column_names
p = figure(x_axis_type="datetime",plot_width=pw, plot_height=ph,toolbar_location=t_loc, tools=t_str)
p_dict = dict()
for col, c, col_name in zip(df.columns,color,col_names):
p_dict[col_name] = p.line('index', col, source=source, color=c,line_width=lw)
p.add_tools(HoverTool(
import papermill as pm
import multiprocessing
import os
import argparse
import json
def run_papermill(config):
''' Function to run notebook(s) in paralell using papermill.
'''
{
"config_bank": {
"notebook": "notebooks/data_explorer.ipynb",
"data_url": "https://raw.githubusercontent.com/andrewm4894/papermill_dev/master/data/bank-full.csv",
"output_label": "bank"
},
"config_adult": {
"notebook": "notebooks/data_explorer.ipynb",
"data_url": "https://raw.githubusercontent.com/andrewm4894/papermill_dev/master/data/adult.csv",
"output_label": "adult"
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class wekaDev {
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
/**
* Hello world! class that is paramaterized (with defaults) using arg4j.
* Example cli usage: java -jar helloWorldParamaterized --msg='Hello arg4j!'
*/
public class helloWorldParamaterized
{
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
import org.datavec.api.split.FileSplit;
import org.datavec.api.transform.TransformProcess;
import org.datavec.api.transform.schema.Schema;
import org.datavec.api.writable.Writable;
import org.datavec.local.transforms.LocalTransformExecutor;
import org.nd4j.linalg.io.ClassPathResource;
import java.io.File;
package org.datavec.transform.basic;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
import org.datavec.api.split.FileSplit;
import org.datavec.api.transform.TransformProcess;
import org.datavec.api.transform.schema.Schema;
import org.datavec.api.transform.transform.sequence.SequenceOffsetTransform;
import org.datavec.api.writable.Writable;
import org.datavec.local.transforms.LocalTransformExecutor;
# this does not work
pipeline = {
{"resample" : {"type" : "trans", "name" : "resample", "kwargs" : {"rule" : "1min"}}}
}
pipeline
'''
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-10-7305ba79e664> in <module>
1 pipeline = {
from multiprocessing import Pool
from functools import partial
import numpy as np
import pandas as pd
def parallelize_dataframe(df, func, n_pool=4, col_subsets=None, join_how='outer',**kwargs):
'''
Function to take a df, a function with args, and a list of column subsets to apply function to.
Resulting list of df's are then joined back together based on the df index.