This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import numpy as np | |
import sys | |
image = Image.open(sys.argv[1]) | |
image_data = np.asarray(image) | |
for i in range(image_data.shape[0]): | |
for j in range(image_data.shape[1]): | |
oldpixel = image_data[i][j] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import numpy as np | |
population = np.random.uniform(size = 5000) | |
def confidence_interval(population, confidence = 95, aggregation = np.mean, samples = 500, sample_n = 500): | |
if population.shape[0] < sample_n: | |
sample_n = population.shape[0]//2 | |
aggs = [] | |
for _ in range(samples): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import mean_squared_error | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import Lasso, Ridge, LinearRegression,ElasticNet | |
from sklearn.model_selection import cross_val_score | |
def train_eval_model(train_housing_prepared, train_labels, strategy, alpha = None, l1_ratio = None): | |
if alpha != None: | |
print('alpha: ', alpha) | |
if l1_ratio != None: | |
print('l1_ratio: ', l1_ratio) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.base import BaseEstimator, TransformerMixin | |
housing_num = housing.drop("ocean_proximity", axis=1) | |
num_attribs = list(housing_num) | |
print(num_attribs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code is not mine. Source: https://github.com/keras-team/keras/blob/master/examples/conv_filter_visualization.py | |
def _generate_filter_image(input_img, | |
layer_output, | |
filter_index): | |
"""Generates image for one particular filter. | |
# Arguments | |
input_img: The input-image Tensor. | |
layer_output: The output-image Tensor. | |
filter_index: The to be processed filter number. | |
Assumed to be valid. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from textgenrnn import textgenrnn | |
import glob | |
textgen = textgenrnn() | |
# textGenRNN.train_from_file requires the whole dataset to be in a single file | |
corpus = '' | |
for file_name in glob.glob('lovecraft_corpus/*'): | |
with open(file_name) as f: | |
corpus+=f.read() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sample_next_word_after_sequence(word_sequence, alpha = 0): | |
next_word_vector = next_after_k_words_matrix[k_words_idx_dict[word_sequence]] + alpha | |
likelihoods = next_word_vector/next_word_vector.sum() | |
return weighted_choice(distinct_words, likelihoods.toarray()) | |
def stochastic_chain(seed, chain_length=15, seed_length=2): | |
current_words = seed.split(' ') | |
if len(current_words) != seed_length: | |
raise ValueError(f'wrong number of words, expected {seed_length}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
k = 2 # adjustable | |
sets_of_k_words = [ ' '.join(corpus_words[i:i+k]) for i, _ in enumerate(corpus_words[:-k]) ] | |
from scipy.sparse import dok_matrix | |
sets_count = len(list(set(sets_of_k_words))) | |
next_after_k_words_matrix = dok_matrix((sets_count, len(distinct_words))) | |
distinct_sets_of_k_words = list(set(sets_of_k_words)) | |
k_words_idx_dict = {word: i for i, word in enumerate(distinct_sets_of_k_words)} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
corpus_words = corpus.split(' ') | |
corpus_words= [word for word in corpus_words if word != ''] | |
corpus_words # [...'a', 'wyvern', ',', 'two', 'of', 'the', 'thousand'...] | |
len(corpus_words) # 2185920 | |
distinct_words = list(set(corpus_words)) | |
word_idx_dict = {word: i for i, word in enumerate(distinct_words)} | |
distinct_words_count = len(list(set(corpus_words))) | |
distinct_words_count # 32663 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
corpus = "" | |
for file_name in file_names: | |
with open(file_name, 'r') as f: | |
corpus+=f.read() | |
corpus = corpus.replace('\n',' ') | |
corpus = corpus.replace('\t',' ') | |
corpus = corpus.replace('“', ' " ') | |
corpus = corpus.replace('”', ' " ') | |
for spaced in ['.','-',',','!','?','(','—',')']: | |
corpus = corpus.replace(spaced, ' {0} '.format(spaced)) |
NewerOlder