-
-
Save dimi-tree/70fdb789c9391f3f58dc to your computer and use it in GitHub Desktop.
Data Science from Scratch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Visualizing Data""" | |
import matplotlib.pyplot as plt | |
from collections import Counter | |
## Bar charts | |
def make_chart_simple_bar_chart(plt): | |
movies = ["Annie Hall", "Ben-Hur", "Casablanca", "Gandhi", "West Side Story"] | |
num_oscars = [5, 11, 3, 8, 10] | |
# bars are by default width 0.8, so we'll add 0.1 to the left coordinates | |
# so that each bar is centered | |
xs = [i + 0.1 for i, _ in enumerate(movies)] | |
# plot bars with left x-coordinates [xs], heights [num_oscars] | |
plt.bar(xs, num_oscars) | |
plt.ylabel("# of Academy Awards") | |
plt.title("My Favorite Movies") | |
# label x-axis with movie names at bar centers | |
plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies) | |
plt.show() | |
def make_chart_histogram(plt): | |
grades = [83,95,91,87,70,0,85,82,100,67,73,77,0] | |
decile = lambda grade: grade // 10 * 10 | |
histogram = Counter(decile(grade) for grade in grades) | |
plt.bar([x - 4 for x in histogram.keys()], # shift each bar to the left by 4 | |
histogram.values(), # give each bar its correct height | |
8) # give each bar a width of 8 | |
plt.axis([-5, 105, 0, 5]) # x-axis from -5 to 105, | |
# y-axis from 0 to 5 | |
plt.xticks([10 * i for i in range(11)]) # x-axis labels at 0, 10, ..., 100 | |
plt.xlabel("Decile") | |
plt.ylabel("# of Students") | |
plt.title("Distribution of Exam 1 Grades") | |
plt.show() | |
# !! Note: be judicious when using plt.axis(). When creating bar charts it's a | |
# good practice to start the y-axis at 0, otherwise the plot can be misleading. | |
## Scatterplots | |
def make_chart_scatterplot(plt): | |
"""When scattering comparable variables, you might get a misleading picture | |
if you let matplotlib choose the scale ( plt.axis() ).""" | |
test_1_grades = [ 99, 90, 85, 97, 80] | |
test_2_grades = [100, 85, 60, 90, 70] | |
plt.scatter(test_1_grades, test_2_grades) | |
plt.xlabel("test 1 grade") | |
plt.ylabel("test 2 grade") | |
plt.title("Axes Are Comparable") | |
plt.axis("equal") # scattering comparable variables | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment