ls -lah
esc then :set number to get line numbers
git checkout <branch_name> -- <paths>
git ls-files --deleted -z | xargs -0 git rm
git pull --no-ff
rename a branch[https://gist.github.com/lttlrck/9628955]
check out files from another repo and maintain git history [https://blog.billyc.io/how-to-copy-one-or-more-files-from-one-git-repo-to-another-and-keep-the-git-history/]
s5cmd du --humanize '<fullpath>/*'
from fastparquet import ParquetFile
pf = ParquetFile('file.parquet')
print(pf.schema)
poetry shell
#if that doesn't work, you can do:
poetry env list --full-path
source {full path}/bin/activate
or as a one-liner: source "$( poetry env list --full-path )/bin/activate"
jq --color-output . <filename>
tar -xzf my_env.tar.gz -C my_env
zip <targetfilename.zip> <folder/*> or <space separated list of filenames>
unzip <targetfilename.zip> -d <target directory>
import zipfile
with zipfile.ZipFile(<zipfilename.zip>, 'r') as f:
f.extractall("<target directory name>")
import shutil
shutil.make_archive(<target name>, 'zip', <source directory name>)
aws s3 cp s3://data/ . --recursive --exclude "*" --include "myfilterhere"
docker rm $(docker ps --filter status=exited -q)
docker image prune -a
cat /etc/hostname
docker cp <your file>:<hostname>
docker cp <hostname>:<path> <localpath>
logger = logging.basicConfig( format="%(asctime)s %(levelname)s:%(name)s:%(message)s", filename=".log", level=logging.INFO )
python -m venv /path/to/new/virtual/environment
source <path>/bin/activate
from pathlib import Path
pathobj = Path(f"{foldername}/{filename}")
pathobj.touch(exist_ok=True)
import itertools;
flatten1 = itertools.chain.from_iterable;
flatten1(your data)
with open('json_list.json', 'w') as f:
for line in dict_list:
f.write(json.dumps(line))
f.write('\n')
with open('json_list.json', 'r') as f:
json_objects = [json.loads(line) for line in f]
python -c "import sys;
sys.path = sys.path[1:];
import django;
print(django.__path__)"
with open(sys.argv[1]\, 'r') as f:
import operator;
sorted_x = sorted(x.items(), key=operator.itemgetter(1))
python -m pip install -e <local absolute path>
os.environ['API_USER'] = 'username'
os.environ['API_PASSWORD'] = 'secret'
USER = os.getenv('API_USER')
PASSWORD = os.environ.get('API_PASSWORD')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 800)
df.isna().sum()
data.groupby(data['date'].map(lambda x: x.year))
print("%.4g"%(long number))
df.sample(1000, replace=False)
ln -s /path_to_script/myscript /usr/bin/myscript
Assuming the script is in a folder, e.g. ~/bin
,
add that folder to PATH in your .bashrc, e.g. export PATH=$PATH:~/bin
cat -n rawfile > file_with_numbered_lines
tar -zxvf <filename>
DESCRIPTION: -z : Uncompress the resulting archive with gzip command. -x : Extract to disk from the archive. -v : Produce verbose output -f : Read the archive from the specified file called data.tar.gz.
ctrl-z, then bg; to bring back do fg %jobnumber
ls | wc -l
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 100% !important; }</style>"))
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 1200)
import warnings; warnings.filterwarnings('ignore')
%load_ext autoreload; %autoreload 1; %aimport module name
-
Convert to datetime
-
df.set_index(datetime_col)
-
unstack() to get 1 column per feature, and fillna(0)
-
for col in plottable.columns:
plt.plot(plottable.index, plottable[col], label=col)
fig = plt.gcf(); fig.savefig('test2png.pdf', dpi=300)
plt.gca(); axes.set_xticklabels(listofstr, rotation=90, fontsize=22)
plt.rcParams["figure.figsize"] = (20,3)
or
fig = plt.gcf(); fig.set_size_inches(18.5, 10.5, forward=True)
index.values.astype('M8[D]')
l = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
sklearn.model_selection.train_test_split(*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None)[source]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=30, train_size=70, random_state=42, shuffle=True)
sklearn.preprocessing.StandardScaler(*, copy=True, with_mean=True, with_std=True) `scaler = StandardScaler(); scaler.fit(data))
print(scaler.mean_) [0.5 0.5]
scaled = scaler.transform(data)`