One Paragraph of project description goes here
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
docker build \ | |
--tag dockstack \ | |
--build-arg BUILD_DATE=`date --utc +"%Y-%m-%dT%H:%M:%SZ"` \ | |
--build-arg VCS_REF=`git rev-parse --short HEAD` \ | |
--build-arg VERSION="test" \ | |
. | |
Sending build context to Docker daemon 11.26kB | |
Step 1/23 : FROM ubuntu:16.04 | |
16.04: Pulling from library/ubuntu |
# Adapted from solution provided by http://stackoverflow.com/users/57719/chin-huang http://stackoverflow.com/a/31465939/348868 | |
# Scenario: You want to add a group to the list of the AllowGroups in ssh_config | |
# before: | |
# AllowGroups Group1 | |
# After: | |
# AllowGroups Group1 Group2 | |
- name: Add Group to AllowGroups | |
replace: | |
backup: yes |
#!/usr/bin/env bash | |
################################################################################ | |
# Boilerplate Shell Script with getopt parsing | |
# | |
# This script is released to the Public Domain by Chad Walstrom | |
# Chad Walstrom <[email protected]>. | |
################################################################################ | |
NOACT=0 | |
NAME=$(basename $0|sed 's/\(\..*\)$//') | |
VERSION="0.1" |
Author unknown. | |
1.) Algorithm Complexity: You need to know Big-O. If you struggle with | |
basic big-O complexity analysis, then you are almost guaranteed not to | |
get hired. | |
For more information on Algorithms you can visit: | |
http://www.topcoder.com/tc?module=Static&d1=tutorials&d2=alg_index | |
2.) Coding: You should know at least one programming language really | |
well, and it should preferably be C++ or Java. C# is OK too, since |
#!/usr/bin/env bash | |
set -e | |
check_finish() { | |
ID=$1 | |
while ! dask-yarn status "${ID}" 2>/dev/null | awk -v col=3 '{print $col}' | grep FINISHED; do | |
echo -e "Application ${ID} not finihsed" | |
sleep 5 | |
done | |
echo -e "Application ${ID} has finished" |
import os | |
import dask | |
import dask.dataframe as dd | |
import pandas as pd | |
import torch | |
from dask.distributed import Client | |
from transformers import RobertaForSequenceClassification, RobertaTokenizer | |
from . import ClusterType, TokensDataset, get_cluster |
from enum import Enum | |
from dask.distributed import Client, LocalCluster, SpecCluster | |
from dask_yarn import YarnCluster | |
class ClusterType(Enum): | |
YARN = 'yarn' | |
LOCAL = 'local' |
"""Main Entrypoint to submit to the Spark Cluster""" | |
import os | |
from typing import Tuple | |
import pandas as pd | |
import torch | |
from data_components.io.files.s3 import Client | |
from pyspark.sql import SparkSession | |
from pyspark.sql.functions import PandasUDFType, col, pandas_udf |
FROM amazoncorretto:8 | |
ENV PYSPARK_DRIVER_PYTHON python3 | |
ENV PYSPARK_PYTHON python3 | |
RUN yum -y update | |
RUN yum -y groupinstall development | |
RUN yum -y update \ | |
&& yum -y group install "Development Tools" development \ | |
&& yum -y install yum-utils which hostname python3-devel python-devel python3-pip python3-virtualenv |