- Create a Fast.ai machine from public templates w/ P4000 and public IP
- $ source deactivate fastai
- $ pip install virtualenv
""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |
import numpy as np | |
import multiprocessing as multi | |
def chunks(n, page_list): | |
"""Splits the list into n chunks""" | |
return np.array_split(page_list,n) | |
cpus = multi.cpu_count() | |
workers = [] | |
page_list = ['www.website.com/page1.html', 'www.website.com/page2.html' |
# Formattinng data | |
data['state'] = data['state'].str.upper() # Capitalize the whole thing | |
data['state'] = data['state'].replace( # Changing the format of the string | |
to_replace=["CA", "C.A", "CALI"], | |
value=["CALIFORNIA", "CALIFORNIA", "CALIFORNIA"]) | |
# Dates and times are quite common in large datasets | |
# Converting all strings to datetime objects is good standardisation practice | |
# Here, the data["time"] strings will look like "2019-01-15", which is exactly | |
# how we set the "format" variable below |
# Filling in NaN values of a particular feature variable | |
avg_height = 67 # Maybe this is a good number | |
data["height"] = data["height"].fillna(avg_height) | |
# Filling in NaN values with a calculated one | |
avg_height = data["height"].median() # This is probably more accurate | |
data["height"] = data["height"].fillna(avg_height) | |
# Dropping rows with missing values | |
# Here we check which rows of "height" aren't null |
# Computing correlation coefficients | |
x_cols = [col for col in data.columns if col not in ['output']] | |
for col in x_cols: | |
corr_coeffs = np.corrcoef(data[col].values, data.output.values) | |
# Get the number of missing values in each column / feature variable | |
data.isnull().sum() | |
# Drop a feature variable |
Para contornar o problema de tunelamento usando ssh para acessar o Jupyter notebook. A ideia é usar o ngrok para acessar o jupyter sem a necessidade de tunelamento via ssh.
https://console.cloud.google.com/compute/instances
Inicie a instância e abra o shell:
Solução para contornar o problema de tunelamento usando ssh para acessar o Jupyter notebook que é bloqueado pelo firewall da rede wifi do ISC, onde ocorrem os encontros presenciais do grupo de estudo em Deep Learning de Brasília.
A ideia é tornar o servidor jupyter executando no Google Cloud Platform (GCP) acessível para rede externa, diretamente por seu IP, sem a necessidade de tunelamento via ssh.
https://console.cloud.google.com/networking/addresses/
Logado na console GCP, acessar o endereço acima e reservar um endereço IP estático: "Reserve Static Address".