Keiichi Kuroyanagi Keiku

🐢

Slowly but surely.

Artificial Intelligence Engineer / Lightning AI Ambassador

Keiku / get_image_paths.py

Created June 18, 2020 03:17

Get image paths.

	import pathlib

	# get image paths list in a directory
	image_dir = pathlib.Path('images').resolve()
	exts = ['.jpg', '.png']
	image_paths = [path for path in image_dir.rglob('*') if path.suffix.lower() in exts]

	# include parent directory
	image_paths = [pathlib.Path(path.parts[-2:]).as_posix() for path in image_dir.rglob('') if path.suffix.lower() in exts]

Keiku / reset_seaborn_settings.py

Created June 9, 2020 03:50

Reset the seaborn setting once set.

	# Reset the seaborn setting once set. It can be used in the middle of a notebook.
	# Reference: python seaborn to reset back to the matplotlib - Stack Overflow https://stackoverflow.com/questions/26899310/python-seaborn-to-reset-back-to-the-matplotlib

	# Either of the following may be used

	# in matplotlib
	import matplotlib as mpl
	mpl.rcParams.update(mpl.rcParamsDefault)

	# in seaborn

Keiku / read_copytext.py

Created January 19, 2018 10:25

Read copy text to pandas DataFrame.

Keiku / split_KFold.py

Last active May 2, 2017 07:10

Split K-fold validation dataset.

	import string
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import KFold, StratifiedKFold

	X_train = np.random.random((10, 2))
	y_train = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	column = "pred"
	n_fold = 5

Keiku / get_wordnet_synonyms.py

Created April 28, 2017 07:04

Extract the synonyms by using wordnet.

	from itertools import chain
	from nltk.corpus import wordnet

	synonyms = wordnet.synsets('change')
	lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms]))
	lemmas
	# Out[31]:
	# {'alter',
	# 'alteration',
	# 'change',

Keiku / stack_sparse_matrix.py

Created April 28, 2017 02:18

Stack the sparse matrices.

	import numpy as np
	import scipy as sp
	import pandas as pd

	df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
	df2 = pd.DataFrame({"C": [5, 6]})

	X1 = sp.sparse.csr_matrix(df1.values)
	X1_dense = X1.todense()
	# Out[28]:

Keiku / list_operations.py

Created April 18, 2017 07:43

list operations.

Keiku / OrderedDict_sample.py

Last active April 13, 2017 03:35

Get keys/values from sorted OrderedDict.

	from collections import OrderedDict

	d = {'A': 3,
	'B': 2,
	'C': 1}

	OrderedDict(sorted(d.items(), key=lambda x: x[0])).values()
	# Out[1]: odict_values([3, 2, 1])
	OrderedDict(sorted(d.items(), key=lambda x: x[1])).values()
	# Out[2]: odict_values([1, 2, 3])

Keiku / extract_onehot_vector.py

Created April 12, 2017 06:30

Extract the one-hot encoding vector.

	from sklearn.preprocessing import LabelEncoder, OneHotEncoder

	X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']])
	# transform to integer
	X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape)
	# transform to binary
	X_bin = OneHotEncoder().fit_transform(X_int).toarray()

	print(X_bin)
	# [[ 1. 0. 0. 1. 0. 1.]

Keiku / extract_tfidf_vector.py

Last active April 11, 2017 07:40

Extract the tf-idf vector.

	text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']

	from sklearn.feature_extraction.text import TfidfVectorizer
	vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)

	X = vectorizer.fit_transform(text)
	X_vovab = vectorizer.get_feature_names()
	# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
	X_mat = X.todense()
	# Out[2]: