AO8 · December 3, 2019 19:54
diff --git a/lead_analyzer2.py b/lead_analyzer2.py
 import csv
 from collections import Counter
 import matplotlib.pyplot as plt

 # set up empty containers
 leads = Counter()
 email_addresses = set()
 total_rows = 0 # storing total rows in csv if useful outside of plot

 # ignores 'Other' entries; 'Other' since removed from appt booking question
 how_students_heard_set = {"word of mouth", "green river website",
                          "greenriver.edu", "career event",
                          "internet search", "IT instructor",
                          "CS instructor", "social media",
                          "email campaign", "partner college referral",
                          "advising day"}

 # iterate over rows in CSV file
 with open("events-export.csv") as f:
    csv_reader = csv.DictReader(f)
    for row in csv_reader:
        total_rows += 1
        # focus on unique email addresses / students only
        if row["Invitee Email"] not in email_addresses:
            email_addresses.add(row["Invitee Email"])
            if row["Response 1"] in how_students_heard_set:
                leads[row["Response 1"]] += 1

 # this bit of tidying up is needed due to Calendly question change from 'website' to '.edu'
 leads["green river website"] += leads["greenriver.edu"]
 del leads["greenriver.edu"]

 # create sorted dict from Counter, by value, descending
 sorted_leads = dict(leads.most_common())

 # calculate totals for printing out
 total = sum(v for v in sorted_leads.values())
 for k,v in sorted_leads.items():
    print(f"{k} = {v} ({round((v/total*100), 1)}%)")

 # extract leads data into two separate lists
 sources = [key for key in sorted_leads.keys()]
 values = [value for value in sorted_leads.values()]

 # set up fig and axs
 plt.style.use("seaborn")
 fig, ax = plt.subplots()
 ind = range(len(values))
 plot = ax.bar(ind, values, color="#28a745")

 # display data value on head of each bar
 for rect in plot:
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width() / 2, 1.002 * height,
            f"{int(height)}", ha="center", va="bottom")

 # configure ticks and lims
 ax.set_xticks(ind) # source for source in sources
 ax.set_xticklabels(sources)
 ax.set_ylim(0, 180)

 # configure labels, title, and annotations
 ax.set_ylabel("Unique Responses")
 plt.title("How students report hearing about our BAS-SD: July 2017 - PRESENT",
          size="large", weight="bold")
 plt.annotate("Data collected at booking via Calendly", xy=(8.105, 175.000))
 plt.annotate(f"{total} unique student data points", xy=(8.450, 170.500))
 plt.annotate("Plot by Andy O", xy=(9.150, 166.000))

 # plot!
 plt.show()
	import csv
	from collections import Counter
	import matplotlib.pyplot as plt

	# set up empty containers
	leads = Counter()
	email_addresses = set()
	total_rows = 0 # storing total rows in csv if useful outside of plot

	# ignores 'Other' entries; 'Other' since removed from appt booking question
	how_students_heard_set = {"word of mouth", "green river website",
	"greenriver.edu", "career event",
	"internet search", "IT instructor",
	"CS instructor", "social media",
	"email campaign", "partner college referral",
	"advising day"}

	# iterate over rows in CSV file
	with open("events-export.csv") as f:
	csv_reader = csv.DictReader(f)
	for row in csv_reader:
	total_rows += 1
	# focus on unique email addresses / students only
	if row["Invitee Email"] not in email_addresses:
	email_addresses.add(row["Invitee Email"])
	if row["Response 1"] in how_students_heard_set:
	leads[row["Response 1"]] += 1

	# this bit of tidying up is needed due to Calendly question change from 'website' to '.edu'
	leads["green river website"] += leads["greenriver.edu"]
	del leads["greenriver.edu"]

	# create sorted dict from Counter, by value, descending
	sorted_leads = dict(leads.most_common())

	# calculate totals for printing out
	total = sum(v for v in sorted_leads.values())
	for k,v in sorted_leads.items():
	print(f"{k} = {v} ({round((v/total*100), 1)}%)")

	# extract leads data into two separate lists
	sources = [key for key in sorted_leads.keys()]
	values = [value for value in sorted_leads.values()]

	# set up fig and axs
	plt.style.use("seaborn")
	fig, ax = plt.subplots()
	ind = range(len(values))
	plot = ax.bar(ind, values, color="#28a745")

	# display data value on head of each bar
	for rect in plot:
	height = rect.get_height()
	ax.text(rect.get_x() + rect.get_width() / 2, 1.002 * height,
	f"{int(height)}", ha="center", va="bottom")

	# configure ticks and lims
	ax.set_xticks(ind) # source for source in sources
	ax.set_xticklabels(sources)
	ax.set_ylim(0, 180)

	# configure labels, title, and annotations
	ax.set_ylabel("Unique Responses")
	plt.title("How students report hearing about our BAS-SD: July 2017 - PRESENT",
	size="large", weight="bold")
	plt.annotate("Data collected at booking via Calendly", xy=(8.105, 175.000))
	plt.annotate(f"{total} unique student data points", xy=(8.450, 170.500))
	plt.annotate("Plot by Andy O", xy=(9.150, 166.000))

	# plot!
	plt.show()
No results found