Skip to content

Instantly share code, notes, and snippets.

View ragingbal's full-sized avatar

Balaji Bal ragingbal

View GitHub Profile
kubectl get pods -n default --no-headers=true | awk '/web-app/{print $1}'| xargs kubectl delete -n default pod
from hdfs import Config
client = Config().get_client('dev')
fnames = client.list('/')
print(fnames)
client.upload(hdfs_path='/data/download.csv', local_path='download.csv', n_threads=1, temp_dir=None, chunk_size=65536, progress=None, cleanup=True)
DROP DATABASE database_name CASCADE;
$hive -e 'show tables in <dbname>' | tee tables.txt
Then create a bash script (describe_tables.sh) to loop over each table in this list:
while read line
do
echo "$line"
eval "hive -e 'describe <dbname>.$line'"
done
conn = hive.connect(host=host, port=port, username=username, database=schema, password=password, auth='LDAP')
query = ("select * from test_bal.abc limit 10")
cur = conn.cursor()
cur.execute(query)
cur.fetchall()
cur.close()
from pyhive import hive
def getHiveConn(host, username, port=10000, schema="db_user1"):
return hive.connect(host=host, port=port, username=username, database=schema, auth=None)
def getHiveData(table, conn = None):
if (conn is None):
conn = getHiveConn("localhost", "user1")
dfRaw = pd.read_sql(f"SELECT * FROM {table}", conn)
# Configuration file for jupyter-notebook.
#------------------------------------------------------------------------------
# Application(SingletonConfigurable) configuration
#------------------------------------------------------------------------------
## This is an application.
## The date format used by logging formatters for %(asctime)s
#c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
# Configuration file for jupyter-notebook.
#------------------------------------------------------------------------------
# Application(SingletonConfigurable) configuration
#------------------------------------------------------------------------------
## This is an application.
## The date format used by logging formatters for %(asctime)s
#c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
import json
import jinja2
with open('Schema_CRM_ACCOUNTCONTRACT.json') as f:
data = json.load(f)
templateLoader = jinja2.FileSystemLoader(searchpath="./")
templateEnv = jinja2.Environment(loader=templateLoader)
TEMPLATE_FILE = "create_table.jinja"
sed -e 's/^\([^=]*\)\s*=\s*\(.*\)/"\1": "\2",/' env