Last active
June 1, 2020 19:27
-
-
Save MattJBritton/e8a7113fe4c0612ad02ee91c0e5e40eb to your computer and use it in GitHub Desktop.
Answer to Cassie Kozyrkov Pattern Challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import pandas as pd | |
import numpy as np | |
from sklearn.linear_model import LinearRegression | |
# load data | |
data = [ | |
(1,28), (2,17), (3,92), | |
(4,41),(5,9), (6,87), | |
(7,54),(8,3),(9,78), | |
(10,67),(11,1),(12,67), | |
(13,78), (14,3), (15,55), | |
(16,86), (17,8), (18,42), | |
(19,92), (20,17), (21,29), | |
(22,94), (23,28), (24,18), | |
(25,93), (26,40), (27,9), | |
(28,87), (29,53), (30,3), | |
(31,79), (32,66), (33,1), | |
(34,68), (35,77), (36,3), | |
(37,56), (38,86), (39,8), | |
(40,43), (41,92), (42,16), | |
(43,30), (44,94), (45,27), | |
(46,19), (47,93), (48,39), | |
(49,10), (50,88), (51,53), | |
(52,4), (53,80), (54,65), | |
(55,1), (56,69), (57,77), | |
(58,3), (59,57), (60,86) | |
] | |
df = pd.DataFrame(data, columns = ["x", "y"]) | |
# just plotting the x and y shows a sinusoidal pattern | |
base = alt.Chart(df).encode( | |
x = "x", | |
y = "y", | |
tooltip = ["x", "y"] | |
).properties(width = 900) | |
base.mark_line()+base.mark_point()+base.mark_text(dy = -8, dx = 8).encode(text = "y") | |
df["series"] = (df["x"]-1).mod(3) | |
# this is made even clearer by highlighting the series | |
base = alt.Chart(df).encode( | |
x = "x", | |
y = "y", | |
tooltip = ["x", "y"], | |
color = "series:N" | |
).properties(width = 900) | |
base.mark_line()+base.mark_point()+base.mark_text(dy = -8, dx = 8).encode(text = "y") | |
# to find the exact equation, create a column for sin(X)^2 and let a linear regression find the coefficients | |
# I've removed a bit of trial and error here where I tried out sin(x), cos(x), etc. | |
df["sin_squared"] = df["x"].apply(lambda x: math.sin(x)**2) | |
x = df["sin_squared"] | |
y = df["y"] | |
linreg = LinearRegression() | |
linreg.fit(x, y) | |
# score is > 0.999 | |
print(linreg.score(x,y)) | |
print(linreg.coef_) | |
print(linreg.intercept_) | |
# this function returns the y for any x | |
def generate_sequence(x): | |
return np.round(linreg.coef_[0]*math.sin(x)**2 + linreg.intercept_, 0) | |
df["predicted"] = df["x"].apply(lambda x: generate_sequence(x)) | |
# view dataframe to compare the "y" and "predicted" columns | |
print(df) | |
# get value for x = 61 | |
generate_sequence(61) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you run the above code, you will get the function:
def generate_sequence(x): return np.round(-92.966*math.sin(x)**2 + 94.227, 0)
and the 61st value is 7 (
generate_sequence(61) == 7
).