Last active
December 3, 2019 19:54
-
-
Save AO8/71ac2e65f328db31a00008fddad99167 to your computer and use it in GitHub Desktop.
Read CSV exported from Calendly to create a bar chart with matplotlib depicting how students heard about program when booking an appointment. This version is more concise using the Counter class as central data structure.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| from collections import Counter | |
| import matplotlib.pyplot as plt | |
| # set up empty containers | |
| leads = Counter() | |
| email_addresses = set() | |
| total_rows = 0 # storing total rows in csv if useful outside of plot | |
| # ignores 'Other' entries; 'Other' since removed from appt booking question | |
| how_students_heard_set = {"word of mouth", "green river website", | |
| "greenriver.edu", "career event", | |
| "internet search", "IT instructor", | |
| "CS instructor", "social media", | |
| "email campaign", "partner college referral", | |
| "advising day"} | |
| # iterate over rows in CSV file | |
| with open("events-export.csv") as f: | |
| csv_reader = csv.DictReader(f) | |
| for row in csv_reader: | |
| total_rows += 1 | |
| # focus on unique email addresses / students only | |
| if row["Invitee Email"] not in email_addresses: | |
| email_addresses.add(row["Invitee Email"]) | |
| if row["Response 1"] in how_students_heard_set: | |
| leads[row["Response 1"]] += 1 | |
| # this bit of tidying up is needed due to Calendly question change from 'website' to '.edu' | |
| leads["green river website"] += leads["greenriver.edu"] | |
| del leads["greenriver.edu"] | |
| # create sorted dict from Counter, by value, descending | |
| sorted_leads = dict(leads.most_common()) | |
| # calculate totals for printing out | |
| total = sum(v for v in sorted_leads.values()) | |
| for k,v in sorted_leads.items(): | |
| print(f"{k} = {v} ({round((v/total*100), 1)}%)") | |
| # extract leads data into two separate lists | |
| sources = [key for key in sorted_leads.keys()] | |
| values = [value for value in sorted_leads.values()] | |
| # set up fig and axs | |
| plt.style.use("seaborn") | |
| fig, ax = plt.subplots() | |
| ind = range(len(values)) | |
| plot = ax.bar(ind, values, color="#28a745") | |
| # display data value on head of each bar | |
| for rect in plot: | |
| height = rect.get_height() | |
| ax.text(rect.get_x() + rect.get_width() / 2, 1.002 * height, | |
| f"{int(height)}", ha="center", va="bottom") | |
| # configure ticks and lims | |
| ax.set_xticks(ind) # source for source in sources | |
| ax.set_xticklabels(sources) | |
| ax.set_ylim(0, 180) | |
| # configure labels, title, and annotations | |
| ax.set_ylabel("Unique Responses") | |
| plt.title("How students report hearing about our BAS-SD: July 2017 - PRESENT", | |
| size="large", weight="bold") | |
| plt.annotate("Data collected at booking via Calendly", xy=(8.105, 175.000)) | |
| plt.annotate(f"{total} unique student data points", xy=(8.450, 170.500)) | |
| plt.annotate("Plot by Andy O", xy=(9.150, 166.000)) | |
| # plot! | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment