- Create a Fast.ai machine from public templates w/ P4000 and public IP
- $ source deactivate fastai
- $ pip install virtualenv
| # Filling in NaN values of a particular feature variable | |
| avg_height = 67 # Maybe this is a good number | |
| data["height"] = data["height"].fillna(avg_height) | |
| # Filling in NaN values with a calculated one | |
| avg_height = data["height"].median() # This is probably more accurate | |
| data["height"] = data["height"].fillna(avg_height) | |
| # Dropping rows with missing values | |
| # Here we check which rows of "height" aren't null |
| # Formattinng data | |
| data['state'] = data['state'].str.upper() # Capitalize the whole thing | |
| data['state'] = data['state'].replace( # Changing the format of the string | |
| to_replace=["CA", "C.A", "CALI"], | |
| value=["CALIFORNIA", "CALIFORNIA", "CALIFORNIA"]) | |
| # Dates and times are quite common in large datasets | |
| # Converting all strings to datetime objects is good standardisation practice | |
| # Here, the data["time"] strings will look like "2019-01-15", which is exactly | |
| # how we set the "format" variable below |
| import numpy as np | |
| import multiprocessing as multi | |
| def chunks(n, page_list): | |
| """Splits the list into n chunks""" | |
| return np.array_split(page_list,n) | |
| cpus = multi.cpu_count() | |
| workers = [] | |
| page_list = ['www.website.com/page1.html', 'www.website.com/page2.html' |
| """ | |
| Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
| BSD License | |
| """ | |
| import numpy as np | |
| # data I/O | |
| data = open('input.txt', 'r').read() # should be simple plain text file | |
| chars = list(set(data)) | |
| data_size, vocab_size = len(data), len(chars) |