One Paragraph of project description goes here
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
| docker build \ | |
| --tag dockstack \ | |
| --build-arg BUILD_DATE=`date --utc +"%Y-%m-%dT%H:%M:%SZ"` \ | |
| --build-arg VCS_REF=`git rev-parse --short HEAD` \ | |
| --build-arg VERSION="test" \ | |
| . | |
| Sending build context to Docker daemon 11.26kB | |
| Step 1/23 : FROM ubuntu:16.04 | |
| 16.04: Pulling from library/ubuntu |
| # Adapted from solution provided by http://stackoverflow.com/users/57719/chin-huang http://stackoverflow.com/a/31465939/348868 | |
| # Scenario: You want to add a group to the list of the AllowGroups in ssh_config | |
| # before: | |
| # AllowGroups Group1 | |
| # After: | |
| # AllowGroups Group1 Group2 | |
| - name: Add Group to AllowGroups | |
| replace: | |
| backup: yes |
| #!/usr/bin/env bash | |
| ################################################################################ | |
| # Boilerplate Shell Script with getopt parsing | |
| # | |
| # This script is released to the Public Domain by Chad Walstrom | |
| # Chad Walstrom <[email protected]>. | |
| ################################################################################ | |
| NOACT=0 | |
| NAME=$(basename $0|sed 's/\(\..*\)$//') | |
| VERSION="0.1" |
| Author unknown. | |
| 1.) Algorithm Complexity: You need to know Big-O. If you struggle with | |
| basic big-O complexity analysis, then you are almost guaranteed not to | |
| get hired. | |
| For more information on Algorithms you can visit: | |
| http://www.topcoder.com/tc?module=Static&d1=tutorials&d2=alg_index | |
| 2.) Coding: You should know at least one programming language really | |
| well, and it should preferably be C++ or Java. C# is OK too, since |
| #!/usr/bin/env bash | |
| set -e | |
| check_finish() { | |
| ID=$1 | |
| while ! dask-yarn status "${ID}" 2>/dev/null | awk -v col=3 '{print $col}' | grep FINISHED; do | |
| echo -e "Application ${ID} not finihsed" | |
| sleep 5 | |
| done | |
| echo -e "Application ${ID} has finished" |
| import os | |
| import dask | |
| import dask.dataframe as dd | |
| import pandas as pd | |
| import torch | |
| from dask.distributed import Client | |
| from transformers import RobertaForSequenceClassification, RobertaTokenizer | |
| from . import ClusterType, TokensDataset, get_cluster |
| from enum import Enum | |
| from dask.distributed import Client, LocalCluster, SpecCluster | |
| from dask_yarn import YarnCluster | |
| class ClusterType(Enum): | |
| YARN = 'yarn' | |
| LOCAL = 'local' |
| """Main Entrypoint to submit to the Spark Cluster""" | |
| import os | |
| from typing import Tuple | |
| import pandas as pd | |
| import torch | |
| from data_components.io.files.s3 import Client | |
| from pyspark.sql import SparkSession | |
| from pyspark.sql.functions import PandasUDFType, col, pandas_udf |
| FROM amazoncorretto:8 | |
| ENV PYSPARK_DRIVER_PYTHON python3 | |
| ENV PYSPARK_PYTHON python3 | |
| RUN yum -y update | |
| RUN yum -y groupinstall development | |
| RUN yum -y update \ | |
| && yum -y group install "Development Tools" development \ | |
| && yum -y install yum-utils which hostname python3-devel python-devel python3-pip python3-virtualenv |