This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SparkSession | |
# init spark | |
spark = SparkSession.builder \ | |
.master("local[*]") \ | |
.appName("anaconda") \ | |
.config("spark.sql.warehouse.dir", "file:///tmp/spark-warehouse") \ | |
.enableHiveSupport() \ | |
.getOrCreate() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download required library | |
#cd /opt/conda/lib/python3.6/site-packages/pyspark-2.4.0-py3.6.egg/pyspark/jars/ | |
#wget http://central.maven.org/maven2/org/elasticsearch/elasticsearch-spark-20_2.11/6.6.1/elasticsearch-spark-20_2.11-6.6.1.jar | |
#ls -l *elastic* | |
# Initialize Spark | |
from pyspark.sql import SparkSession | |
spark = SparkSession.builder \ | |
.master("local[*]") \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy import spatial | |
distances = spatial.distance.squareform(spatial.distance.pdist(message_embeddings, 'cosine')) | |
def progress(i): | |
print('\r{} {}'.format('-\|/'[i % 4], i), end='') | |
def cluster(items, distances, similarity_threshold=0.11): | |
print('Clustering threshold:', similarity_threshold) | |
clusters = list() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import tensorflow_hub as hub | |
import numpy as np | |
import os | |
import pandas as pd | |
from scipy import spatial | |
from operator import itemgetter | |
#module_url = "https://tfhub.dev/google/universal-sentence-encoder/2" | |
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.hadoop.conf.Configuration | |
import org.apache.hadoop.mapreduce.Job | |
import org.apache.hadoop.io.{LongWritable, Text} | |
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat | |
def wcsv_to_df( | |
fileName: String, | |
tableName: String, | |
columns: Array[String], | |
fieldTerminator: String, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/PostgreSQL.Procedural.Importing.html#USER_PostgreSQL.S3Import.table_import_from_s3 | |
-- https://github.com/chimpler/postgres-aws-s3 | |
CREATE EXTENSION aws_s3 CASCADE; | |
DROP TABLE nyse; | |
CREATE TABLE nyse ( | |
exchange VARCHAR(50), | |
stock_symbol VARCHAR(50), | |
stock_date DATE, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import pandas as pd | |
s3 = boto3.client('s3', 'eu-west-1') | |
def execute_query(query): | |
response = s3.select_object_content( | |
Bucket='my-bucket', | |
Key='nyse/NYSE-2000-2001.tsv.gz', | |
ExpressionType='SQL', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Cloudwatch | |
setInterval(function(){ | |
document.getElementsByClassName('cwdb-log-viewer-table-infinite-loader-bottom')[0].lastElementChild.click(); | |
document.getElementsByClassName('GIYU-ANBFDF')[0].scroll(0, document.body.scrollHeight) | |
}, 3000); | |
// EMR | |
setInterval(function(){ | |
document.getElementsByClassName('GAEMCWHGM')[14].click() | |
}, 5000); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_spark(app_name): | |
""" | |
Creates Spark session with default parameters | |
""" | |
spark = SparkSession.builder \ | |
.master(os.environ.get("SPARK_MASTER", "local[*]")) \ | |
.appName(app_name) \ | |
.config("spark.default.parallelism", 16) \ | |
.config("spark.sql.adaptive.enabled", True) \ | |
.config("spark.sql.warehouse.dir", SPARK_WAREHOUSE) \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
server { | |
listen 80; | |
server_name localhost; | |
auth_basic "Restricted Access"; | |
auth_basic_user_file /etc/nginx/htpasswd.users; | |
location / { | |
proxy_pass https://vpc-my-es-574vcxyz.eu-central-1.es.amazonaws.com/; |