This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################## | |
## Native hdfs access (only on the cluster) | |
# conda install -c conda-forge libhdfs3=2.3.0=1 hdfs3 --yes | |
import hdfs3 | |
import pandas as pd | |
nameNodeHost = 'hadoopnn1.localdomain' | |
nameNodeIPCPort = 8020 | |
hdfs = hdfs3.HDFileSystem(nameNodeHost, port=nameNodeIPCPort) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## pip install python-gitlab gitpython | |
import gitlab # python-gitlab | |
from git import Repo # gitpython | |
import os, time | |
########################## | |
### Python Gitlab Config: ~/.python-gitlab.cfg | |
# [global] | |
# default = GitLab | |
# ssl_verify = true |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pandas | |
import sys | |
import argparse | |
import string | |
import subprocess | |
import json | |
import textwrap | |
import re |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyhive import hive | |
import pandas as pd | |
from vdom import pre | |
# Nteract Data Explorer | |
pd.options.display.html.table_schema = True # Data Explorer On! | |
pd.options.display.max_rows = None # Send all the data! (careful!) | |
def getHiveConn(host, username, port=10000, schema="db_user1"): | |
return hive.connect(host=host, port=port, username=username, database=schema, auth=None) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get the requested resources for all pods by container | |
kubectl get pods -ao jsonpath='{range .items[*]}{@.metadata.name}{"\n"}{range @.spec.containers[*]}{"\t"}{@.name}{" cpu:"}{@.resources.requests.cpu}{" mem:"}{@.resources.requests.memory}{"\n"}{end}{end}' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Concatenate the Root and SubCA certs from DigiCert | |
# to get the full certification chain | |
cat DigiCert_Global_Root_CA.pem DigiCertSHA2SecureServerCA.pem > DigiCertCA_Chain.pem | |
# Generate a new key store from the signed cert, the private key | |
openssl pkcs12 -export \ | |
-in my_cert_signed.crt | |
-inkey my_cert_key.pem | |
-chain -CAfile DigiCertCA_Chain.pem \ | |
-name "my_cert" -out my_cert.keystore.p12 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Unzip in-place (i.e. in the folder containing the file and not the current one) | |
find . -type f -name "*.zip" | xargs -P4 -I fileName sh -c 'unzip -o -d "$(dirname "fileName")" "fileName" && rm "fileName"' | |
# Gzip all CSV extracted from the ZIP files | |
find . -type f -name *.csv -print0 | xargs -0 -n1 -P4 gzip |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import pyarrow.orc as orc | |
file0 = open('/hive/warehouse/000000_0', 'rb') | |
data0 = orc.ORCFile(file0) | |
df0 = data0.read(columns=['_col10', '_col50']).to_pandas() | |
df0.describe() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get the Hive Symlinks | |
aws s3 ls s3://<BUCKET>/hive/ --recursive | awk '{print "s3://<BUCKET>/"$4}' | |
# Delete all data files for the selected Hive partition | |
aws s3 cp s3://<BUCKET>/hive/dt=2019-07-24-00-00/symlink.txt - | xargs -I {} sh -c 'aws s3 rm {}' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import snowflake.connector | |
ctx = snowflake.connector.connect( | |
authenticator="snowflake", | |
user=os.getenv("SNOWSQL_USER"), | |
password=os.getenv("SNOWSQL_PWD"), | |
account=os.getenv("SNOWSQL_ACCOUNT"), | |
warehouse=os.getenv("SNOWSQL_WAREHOUSE") | |
) |
OlderNewer