Skip to content

Instantly share code, notes, and snippets.

View chathurawidanage's full-sized avatar
🏠
Working from home

Chathura Widanage chathurawidanage

🏠
Working from home
View GitHub Profile
from cProfile import label
import re
from datetime import datetime
host_mem = 73
dev_mem = 16
def plot(worker = '0'):
print("Plotting worker", worker)
logs = open("theseus_log/batch_log."+worker+".log")
from theseus import TheseusContext
tc = TheseusContext(config_options={
"EXECUTOR_THREADS": 1
}, output_type="cudf")
tc.create_table('ex', 'example.parquet')
tc.create_table('nation', 'nation.parquet')
tc.create_table('region', 'region.parquet')
@chathurawidanage
chathurawidanage / write_pq.py
Last active May 13, 2022 14:38
generate data
import types
import pyarrow.parquet as pq
import pyarrow as pa
import pandas as pd
import numpy as np
data = np.array(np.random.random_sample((5,)), dtype=np.float32)
data2 = np.array(np.random.random_sample((5,)), dtype=np.float32)
data3 = np.array(np.random.random_sample((5,)), dtype=np.float32)
@chathurawidanage
chathurawidanage / cylon.yaml
Created June 15, 2021 03:11
Cylon Kubernetes Deployment
apiVersion: kubeflow.org/v1alpha2
kind: MPIJob
metadata:
name: cylon-examples
spec:
slotsPerWorker: 1
cleanPodPolicy: Running
mpiReplicaSpecs:
Launcher:
replicas: 1
from pycylon import DataFrame, read_csv, CylonEnv
from pycylon.net import MPIConfig
env = CylonEnv(config=MPIConfig())
print("Hello from Worker : ", env.rank)
df1 = read_csv("file1.csv", slice=True)
df1.set_index([0])
df2 = read_csv("file2.csv", slice=True)
from pycylon import DataFrame, read_csv, CylonEnv
from pycylon.net import MPIConfig
env = CylonEnv(config=MPIConfig())
print("Hello from Worker : ", env.rank)
df1 = read_csv("file1_"+str(env.rank)+".csv")
df1.set_index([0])
df2 = read_csv("file2_"+str(env.rank)+".csv")
from pycylon import DataFrame, read_csv, CylonEnv
from pycylon.net import MPIConfig
env = CylonEnv(config=MPIConfig())
print("Hello from Worker : ", env.rank)
df1 = read_csv("file1.csv")
df1.set_index([0])
df2 = read_csv("file2.csv")
from pycylon import DataFrame, read_csv
df1 = read_csv("file1.csv")
df1.set_index([0])
df2 = read_csv("file2.csv")
df2.set_index([0])
join = df1.merge(df2, left_on=[0], right_on=[0])
print(join)
@chathurawidanage
chathurawidanage / dsp.cpp
Created May 12, 2021 17:23
Standalone DIDA dispatch
// #include "config.h"
// #include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <stdint.h>
#include <getopt.h>
// #include "Uncompress.h"
@chathurawidanage
chathurawidanage / mrg.cpp
Last active May 4, 2021 19:34
DIDA Merge | scone g++ -o mrg mrg.cpp -lstdc++ -ldl
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <queue>
#include <list>
#include <stdint.h>
#include <getopt.h>