Skip to content

Instantly share code, notes, and snippets.

set -ex
# method 1
# [2020-01-01, 2020-01-31]
for i in {0..30}; do
thedate=$(date -I -d "2020-01-01 +$i days")
echo $thedate
done
# method 2
@idiomer
idiomer / pyspark_hdfs_utils.py
Last active April 11, 2025 07:02
Using PySpark to handle HDFS, such as list (ls), rename (mv), delete (rm)
'''
The path is a directory by default
'''
def hdfs_list(path, subtract_one=True):
fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(spark._jsc.hadoopConfiguration())
list_status = fs.listStatus(spark._jvm.org.apache.hadoop.fs.Path(path))
# file.getPath().getName(), file.getBlockSize(), file.getLen()
files_size = [file.getLen() for file in list_status]
totol_size_in_MB = sum(files_size) / 1024.0 / 1024.0
#!/bin/bash
deactive_proxy () {
if [ ! "$1" = "nondestructive" ] ; then
unset -f deactive_proxy
if [ -n "$_OLD_HTTP_PROXY" ] ; then
http_proxy="$_OLD_HTTP_PROXY"
export http_proxy
unset _OLD_HTTP_PROXY
else