Skip to content

Instantly share code, notes, and snippets.

@AO8
Last active December 3, 2019 19:54
Show Gist options
  • Select an option

  • Save AO8/71ac2e65f328db31a00008fddad99167 to your computer and use it in GitHub Desktop.

Select an option

Save AO8/71ac2e65f328db31a00008fddad99167 to your computer and use it in GitHub Desktop.
Read CSV exported from Calendly to create a bar chart with matplotlib depicting how students heard about program when booking an appointment. This version is more concise using the Counter class as central data structure.
import csv
from collections import Counter
import matplotlib.pyplot as plt
# set up empty containers
leads = Counter()
email_addresses = set()
total_rows = 0 # storing total rows in csv if useful outside of plot
# ignores 'Other' entries; 'Other' since removed from appt booking question
how_students_heard_set = {"word of mouth", "green river website",
"greenriver.edu", "career event",
"internet search", "IT instructor",
"CS instructor", "social media",
"email campaign", "partner college referral",
"advising day"}
# iterate over rows in CSV file
with open("events-export.csv") as f:
csv_reader = csv.DictReader(f)
for row in csv_reader:
total_rows += 1
# focus on unique email addresses / students only
if row["Invitee Email"] not in email_addresses:
email_addresses.add(row["Invitee Email"])
if row["Response 1"] in how_students_heard_set:
leads[row["Response 1"]] += 1
# this bit of tidying up is needed due to Calendly question change from 'website' to '.edu'
leads["green river website"] += leads["greenriver.edu"]
del leads["greenriver.edu"]
# create sorted dict from Counter, by value, descending
sorted_leads = dict(leads.most_common())
# calculate totals for printing out
total = sum(v for v in sorted_leads.values())
for k,v in sorted_leads.items():
print(f"{k} = {v} ({round((v/total*100), 1)}%)")
# extract leads data into two separate lists
sources = [key for key in sorted_leads.keys()]
values = [value for value in sorted_leads.values()]
# set up fig and axs
plt.style.use("seaborn")
fig, ax = plt.subplots()
ind = range(len(values))
plot = ax.bar(ind, values, color="#28a745")
# display data value on head of each bar
for rect in plot:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() / 2, 1.002 * height,
f"{int(height)}", ha="center", va="bottom")
# configure ticks and lims
ax.set_xticks(ind) # source for source in sources
ax.set_xticklabels(sources)
ax.set_ylim(0, 180)
# configure labels, title, and annotations
ax.set_ylabel("Unique Responses")
plt.title("How students report hearing about our BAS-SD: July 2017 - PRESENT",
size="large", weight="bold")
plt.annotate("Data collected at booking via Calendly", xy=(8.105, 175.000))
plt.annotate(f"{total} unique student data points", xy=(8.450, 170.500))
plt.annotate("Plot by Andy O", xy=(9.150, 166.000))
# plot!
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment