This repo demonstrates moving from pandas to spark for big data analysis
Python - v3.5.2
Pandas - v0.19.2
Spark - v2.1.0
#!/bin/bash | |
# Install required softwares | |
sudo apt-get update | |
sudo apt-get install fish -y | |
sudo chsh -s /usr/bin/fish | |
# Setup static IP - | |
# LAN - 192.168.0.99 | |
# WLAN - 192.168.0.199 |
#!/usr/bin/env bash | |
# Install Open MPI v4 | |
wget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.0.tar.gz | |
tar -xvf openmpi-4.0.0.tar.gz | |
cd openmpi-4.0.0 | |
./configure --prefix=/usr/local | |
make all | |
sudo make install | |
sudo ldconfig |
#!/usr/bin/env bash | |
# Remove and Install nvidia-docker2 | |
#======================================================================================================================================== | |
# If you have nvidia-docker 1.0 installed: we need to remove it and all existing GPU containers | |
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f | |
sudo apt-get purge -y nvidia-docker | |
# Add the package repositories | |
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \ | |
sudo apt-key add - |
# No Proper documentation on installing horovod with spark support | |
# Reference - https://github.com/uber/horovod/blob/master/docs/spark.md | |
# Corrupt Java installtion - | |
sudo apt-add-repository ppa:webupd8team/java | |
sudo apt-get update | |
sudo apt-get install oracle-java8-installer | |
# Install Spark | |
wget http://mirrors.estointernet.in/apache/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz |
# Install fish | |
cd /etc/yum.repos.d/ | |
sudo wget https://download.opensuse.org/repositories/shells:fish:release:2/RedHat_RHEL-6/shells:fish:release:2.repo | |
sudo yum install fish | |
# Update locatedb | |
sudo ionice -c3 updatedb | |
# Install g++ | |
sudo yum install gcc72-c++ |
#!/usr/bin/env bash | |
# Install Open MPI v4 | |
wget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.0.tar.gz | |
tar -xvf openmpi-4.0.0.tar.gz | |
cd openmpi-4.0.0 | |
./configure --prefix=/usr/local | |
make all | |
sudo make install | |
sudo ldconfig |
def humanize_time_delta(td_object): | |
seconds = td_object | |
periods = [ | |
('year', 60 * 60 * 24 * 365), | |
('month', 60 * 60 * 24 * 30), | |
('day', 60 * 60 * 24), | |
('hour', 60 * 60), | |
('minute', 60), | |
('second', 1), | |
('milli_second', 1 / 10 ** 3), |
import typing | |
import tensorflow as tf | |
def validate_fetch(fetch_ops: typing.Union[str, list, dict, tf.Tensor]): | |
if isinstance(fetch_ops, str): | |
if not len(fetch_ops) > 0: | |
raise Exception("Fetch Op is an empty string") | |
return fetch_ops | |
elif isinstance(fetch_ops, tf.Tensor): |
import re | |
def generate_label_from_name(name: str): | |
""" | |
:param name: Name for which label has to be generate | |
:return: | |
""" | |
caps_split = sum([list(match) for match in re.findall('([A-Z][^A-Z][a-z]*)|([0-9][A-Z][^A-Z][a-z]*)|([0-9][A-Z][a-z]*)', name)], []) |