This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
# get image paths list in a directory | |
image_dir = pathlib.Path('images').resolve() | |
exts = ['.jpg', '.png'] | |
image_paths = [path for path in image_dir.rglob('*') if path.suffix.lower() in exts] | |
# include parent directory | |
image_paths = [pathlib.Path(*path.parts[-2:]).as_posix() for path in image_dir.rglob('*') if path.suffix.lower() in exts] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reset the seaborn setting once set. It can be used in the middle of a notebook. | |
# Reference: python seaborn to reset back to the matplotlib - Stack Overflow https://stackoverflow.com/questions/26899310/python-seaborn-to-reset-back-to-the-matplotlib | |
# Either of the following may be used | |
# in matplotlib | |
import matplotlib as mpl | |
mpl.rcParams.update(mpl.rcParamsDefault) | |
# in seaborn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from io import StringIO | |
def read_copytext(text): | |
text1 = StringIO(text) | |
df = pd.read_table(text1) | |
df.columns = ["col1"] | |
df["col1"] = df["col1"].str.replace("\s+", ",") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
import numpy as np | |
import pandas as pd | |
from sklearn.model_selection import KFold, StratifiedKFold | |
X_train = np.random.random((10, 2)) | |
y_train = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) | |
column = "pred" | |
n_fold = 5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain | |
from nltk.corpus import wordnet | |
synonyms = wordnet.synsets('change') | |
lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms])) | |
lemmas | |
# Out[31]: | |
# {'alter', | |
# 'alteration', | |
# 'change', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy as sp | |
import pandas as pd | |
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) | |
df2 = pd.DataFrame({"C": [5, 6]}) | |
X1 = sp.sparse.csr_matrix(df1.values) | |
X1_dense = X1.todense() | |
# Out[28]: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as pd | |
# Python | |
list(map(lambda x: x + 1, range(1, 6, 1))) | |
# Out[1]: [2, 3, 4, 5, 6] | |
# Numpy | |
list(np.array(range(1, 6, 1)) + 1) | |
# Out[2]: [2, 3, 4, 5, 6] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict | |
d = {'A': 3, | |
'B': 2, | |
'C': 1} | |
OrderedDict(sorted(d.items(), key=lambda x: x[0])).values() | |
# Out[1]: odict_values([3, 2, 1]) | |
OrderedDict(sorted(d.items(), key=lambda x: x[1])).values() | |
# Out[2]: odict_values([1, 2, 3]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']]) | |
# transform to integer | |
X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape) | |
# transform to binary | |
X_bin = OneHotEncoder().fit_transform(X_int).toarray() | |
print(X_bin) | |
# [[ 1. 0. 0. 1. 0. 1.] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF'] | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None) | |
X = vectorizer.fit_transform(text) | |
X_vovab = vectorizer.get_feature_names() | |
# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf'] | |
X_mat = X.todense() | |
# Out[2]: |