This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/PostgreSQL.Procedural.Importing.html#USER_PostgreSQL.S3Import.table_import_from_s3 | |
-- https://github.com/chimpler/postgres-aws-s3 | |
CREATE EXTENSION aws_s3 CASCADE; | |
DROP TABLE nyse; | |
CREATE TABLE nyse ( | |
exchange VARCHAR(50), | |
stock_symbol VARCHAR(50), | |
stock_date DATE, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.hadoop.conf.Configuration | |
import org.apache.hadoop.mapreduce.Job | |
import org.apache.hadoop.io.{LongWritable, Text} | |
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat | |
def wcsv_to_df( | |
fileName: String, | |
tableName: String, | |
columns: Array[String], | |
fieldTerminator: String, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import tensorflow_hub as hub | |
import numpy as np | |
import os | |
import pandas as pd | |
from scipy import spatial | |
from operator import itemgetter | |
#module_url = "https://tfhub.dev/google/universal-sentence-encoder/2" | |
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy import spatial | |
distances = spatial.distance.squareform(spatial.distance.pdist(message_embeddings, 'cosine')) | |
def progress(i): | |
print('\r{} {}'.format('-\|/'[i % 4], i), end='') | |
def cluster(items, distances, similarity_threshold=0.11): | |
print('Clustering threshold:', similarity_threshold) | |
clusters = list() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download required library | |
#cd /opt/conda/lib/python3.6/site-packages/pyspark-2.4.0-py3.6.egg/pyspark/jars/ | |
#wget http://central.maven.org/maven2/org/elasticsearch/elasticsearch-spark-20_2.11/6.6.1/elasticsearch-spark-20_2.11-6.6.1.jar | |
#ls -l *elastic* | |
# Initialize Spark | |
from pyspark.sql import SparkSession | |
spark = SparkSession.builder \ | |
.master("local[*]") \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
# init spark | |
spark = SparkSession.builder \ | |
.master("local[*]") \ | |
.appName("anaconda") \ | |
.config("spark.sql.warehouse.dir", "file:///tmp/spark-warehouse") \ | |
.enableHiveSupport() \ | |
.getOrCreate() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Update packages | |
sudo yum update -y | |
# Mount EBS volume | |
sudo mkfs -t xfs /dev/xvdb | |
sudo mkdir /data | |
sudo mount /dev/xvdb /data | |
# Install MongoDB | |
echo ' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requies JSON as the output format and "jq" commandline tool | |
# If task runs successfuly, exits 0 | |
run_result=$(aws ecs run-task \ | |
--cluster ${CLUSTER} \ | |
--task-definition ${TASK_DEFINITION} \ | |
--launch-type EC2 \ | |
--overrides "${OVERRIDES}") | |
echo ${run_result} | |
container_arn=$(echo $run_result | jq -r '.tasks[0].taskArn') | |
aws ecs wait tasks-stopped \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def json_hive_def(path): | |
spark.read.json(path).createOrReplaceTempView("temp_view") | |
spark.sql("CREATE TABLE temp_table AS SELECT * FROM temp_view LIMIT 0") | |
script = spark.sql("SHOW CREATE TABLE temp_table").take(1)[0].createtab_stmt.replace('\n', '') | |
spark.sql("DROP TABLE temp_table") | |
return script |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo su | |
apt-get install -y libyaml-dev python-dev python3-dev python3-pip | |
pip3 install awscli-cwlogs | |
if [ ! -d /var/awslogs/bin ] ; then | |
mkdir -p /var/awslogs/bin | |
ln -s /usr/local/bin/aws /var/awslogs/bin/aws | |
fi | |
mkdir /opt/awslogs | |
cd /opt/awslogs | |
curl https://s3.amazonaws.com/aws-cloudwatch/downloads/latest/awslogs-agent-setup.py -O |