Skip to content

Instantly share code, notes, and snippets.

View StrikingLoo's full-sized avatar
😄

Luciano StrikingLoo

😄
View GitHub Profile
from PIL import Image
import numpy as np
import sys
image = Image.open(sys.argv[1])
image_data = np.asarray(image)
for i in range(image_data.shape[0]):
for j in range(image_data.shape[1]):
oldpixel = image_data[i][j]
@StrikingLoo
StrikingLoo / bootstrapping.py
Last active July 14, 2022 05:03
A simple implementation for a confidence interval using bootstrapping
import random
import numpy as np
population = np.random.uniform(size = 5000)
def confidence_interval(population, confidence = 95, aggregation = np.mean, samples = 500, sample_n = 500):
if population.shape[0] < sample_n:
sample_n = population.shape[0]//2
aggs = []
for _ in range(samples):
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge, LinearRegression,ElasticNet
from sklearn.model_selection import cross_val_score
def train_eval_model(train_housing_prepared, train_labels, strategy, alpha = None, l1_ratio = None):
if alpha != None:
print('alpha: ', alpha)
if l1_ratio != None:
print('l1_ratio: ', l1_ratio)
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
housing_num = housing.drop("ocean_proximity", axis=1)
num_attribs = list(housing_num)
print(num_attribs)
# This code is not mine. Source: https://github.com/keras-team/keras/blob/master/examples/conv_filter_visualization.py
def _generate_filter_image(input_img,
layer_output,
filter_index):
"""Generates image for one particular filter.
# Arguments
input_img: The input-image Tensor.
layer_output: The output-image Tensor.
filter_index: The to be processed filter number.
Assumed to be valid.
from textgenrnn import textgenrnn
import glob
textgen = textgenrnn()
# textGenRNN.train_from_file requires the whole dataset to be in a single file
corpus = ''
for file_name in glob.glob('lovecraft_corpus/*'):
with open(file_name) as f:
corpus+=f.read()
def sample_next_word_after_sequence(word_sequence, alpha = 0):
next_word_vector = next_after_k_words_matrix[k_words_idx_dict[word_sequence]] + alpha
likelihoods = next_word_vector/next_word_vector.sum()
return weighted_choice(distinct_words, likelihoods.toarray())
def stochastic_chain(seed, chain_length=15, seed_length=2):
current_words = seed.split(' ')
if len(current_words) != seed_length:
raise ValueError(f'wrong number of words, expected {seed_length}')
k = 2 # adjustable
sets_of_k_words = [ ' '.join(corpus_words[i:i+k]) for i, _ in enumerate(corpus_words[:-k]) ]
from scipy.sparse import dok_matrix
sets_count = len(list(set(sets_of_k_words)))
next_after_k_words_matrix = dok_matrix((sets_count, len(distinct_words)))
distinct_sets_of_k_words = list(set(sets_of_k_words))
k_words_idx_dict = {word: i for i, word in enumerate(distinct_sets_of_k_words)}
corpus_words = corpus.split(' ')
corpus_words= [word for word in corpus_words if word != '']
corpus_words # [...'a', 'wyvern', ',', 'two', 'of', 'the', 'thousand'...]
len(corpus_words) # 2185920
distinct_words = list(set(corpus_words))
word_idx_dict = {word: i for i, word in enumerate(distinct_words)}
distinct_words_count = len(list(set(corpus_words)))
distinct_words_count # 32663
corpus = ""
for file_name in file_names:
with open(file_name, 'r') as f:
corpus+=f.read()
corpus = corpus.replace('\n',' ')
corpus = corpus.replace('\t',' ')
corpus = corpus.replace('“', ' " ')
corpus = corpus.replace('”', ' " ')
for spaced in ['.','-',',','!','?','(','—',')']:
corpus = corpus.replace(spaced, ' {0} '.format(spaced))