This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
def xy(a, b, c, d, n): | |
v = [a, b, c, d] | |
r = (np.average(v)-np.min(v))/(np.max(v)-np.min(v)) | |
x = r*np.cos((np.pi/3)*n) | |
y = r*np.sin((np.pi/3)*n) | |
return x, y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
# [1] import and visualise our data | |
dataset = pd.read_csv('adult.csv') # import data | |
# from Kaggle's 'Adult Census Income' dataset | |
# lets visualise the data, we classify by income (>$50k or not) | |
# based on number of years in education and hours worked per week | |
plt.figure(figsize=(12, 8)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# [2] now split into train/test set | |
# create our mask (70%) | |
mask = np.random.rand(len(dataset)) < 0.7 | |
train = dataset[mask] # get 70% of samples from mask indices | |
test = dataset[~mask] # get other 30% of samples | |
# we also need to split the data based on whether person earns | |
# more than or less than 50K | |
less = train[train['income'] == '<=50K'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mean(x): | |
return sum(x) / len(x) # calculate mean (in reality use np.mean(x)) | |
less_mean = mean(less) | |
more_mean = mean(more) # calculate mean for both models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def std(x, mu): | |
# calculate standard deviation (in reality np.std(x)) | |
return np.sqrt(sum(np.power(np.subtract(x, mu), 2)) / len(x)) | |
std_less = np.std(less, mean_less) | |
std_more = np.std(more, mean_more) # calculate standard deviation for both models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def choose(): # here we setup our fruit picker script | |
if np.random.randint(0, 10) < 4: | |
# we have chosen bag A (40% probability) | |
if np.random.randint(0, 10) < 4: | |
# we have chosen an apple from bag A | |
return ('A', 'Apple') | |
else: | |
# we have chosen an orange from bag A |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyodbc | |
from datetime import datetime | |
class Sql: | |
"""Class used for establishing a Python to Microsoft SQL Server connection | |
and import/export/manipulation of data files inside the server. | |
""" | |
def __init__(self, database, server="XXVIR00012,55000"): | |
"""Here we are initialising our database and server parameters and | |
our connection to SQL server. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def push_dataframe(self, data, table="raw_data", batchsize=500, | |
overwrite=False): | |
"""Function used to upload a Pandas DataFrame (data) to SQL Server. | |
Keyword arguments: | |
data -- the dataframe to be uploaded | |
table -- the name of the new table in SQL (default "raw_data") | |
batchsize -- the number of rows to upload to the new table within each | |
execution, recommend no more than 1000 (default 500) | |
overwrite -- safety measure used to ensure user does not accidentally |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def push_dataframe(self, data, table="raw_data", batchsize=500): | |
# create execution cursor | |
cursor = self.cnxn.cursor() | |
# activate fast execute | |
cursor.fast_executemany = True | |
# create create table statement | |
query = "CREATE TABLE [" + table + "] (\n" | |
# iterate through each column to be included in create table statement |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyodbc | |
from datetime import datetime | |
class Sql: | |
def __init__(self, database, server="XXVIR00012,55000"): | |
# here we are telling python what to connect to (our SQL Server) | |
self.cnxn = pyodbc.connect("Driver={SQL Server Native Client 11.0};" | |
"Server="+server+";" | |
"Database="+database+";" |
OlderNewer