flying-sheep · August 29, 2015 14:14
diff --git a/generate_graphs.py b/generate_graphs.py
 import csv
 from collections import Counter
 from operator import itemgetter
 from matplotlib import rcParams
 import matplotlib.pyplot as plt

 rcParams.update({'figure.autolayout': True})


 answers_2013 = {"written_python2": (4660, 119),
                "written_python3": (2862, 1908),
                "more_python2_or_python3": (3692, 1036),
                "python3_mistake": (1071, 3603),
                "dependencies_python3": (2883, 1808),
                "ported_2_to_3": (1554, 3179),
                "used_2to3": (768, 3953),
                "used_3to2": (91, 4627),
                "polyglot": (1473, 3261)}

 questions_2014 = {
    "written_python2": "Have you ever written code in Python 2.x?",
    "written_python3": "Have you ever written code in Python 3.x?",
    "more_python2_or_python3": "Do you currently write more code in Python 2.x or Python 3.x?",
    "personal_choice": "When starting a personal project, which Python version do you use?",
    "versions": "Which Python versions do you regularly use?",
    "python3_mistake": "Do you think Python 3.x was a mistake?",
    "ported_2_to_3": "Have you ever ported code from Python 2.x to Python 3.x?",
    "used_2to3": "Have you ever written/ported code using 2to3?",
    "used_3to2": "Have you ever written/ported code using 3to2?",
    "polyglot": "Have you ever written/ported code to run on Python 2.x and Python 3.x unmodified?",
    "blocking": "If applicable, what keeps you from leaving python 2 for python 3?"}
 answers_2014_filename = "survey_answers.csv"


 common_questions = set(answers_2013) & set(questions_2014)
 new_questions = set(questions_2014) - set(answers_2013)
 multi_choices_questions = {"blocking", "versions"}  # both are new


 def build_graphs():
    answers_2014 = parse_answers()

    # deal with common questions
    for q in common_questions:
        deal_with_question(q, answers_2013[q], answers_2014[q])

    # strip "python" prefix
    prefix_len = len("Python ")
    perso = {q[prefix_len:]: v for q, v in answers_2014["versions"].items()}
    deal_with_new_question("versions", perso)

    deal_with_new_question("personal_choice", answers_2014["personal_choice"],
                           relative=True)

    deal_with_blocking(answers_2014["blocking"])


 def parse_answers():
    phrase_to_question = {v: k for k, v in questions_2014.items()}
    answers = {q: Counter() for q in questions_2014}
    with open(answers_2014_filename, "r") as h:
        content = csv.DictReader(h)
        for row in content:
            for phrase, ans in row.items():
                if phrase not in phrase_to_question:
                    continue
                q = phrase_to_question[phrase]
                if q in multi_choices_questions:
                    for ans in ans.split(','):
                        answers[q][ans.strip()] += 1
                else:
                    answers[q][ans] += 1
    return answers


 def save(fname, dpi=60):  # folder must already exist
    plt.savefig("../images/python-survey/" + fname, dpi=dpi)


 def deal_with_question(question_name, old, new_d):
    """
    Build a barchart for the given question

    Example input:
    old = (4660, 119)
    new_d = {'Yes': 6424, 'No': 191}
    # first: label old, order new_d
    """
    if question_name == "python3_mistake":
        # shorten the answer, fill old responses
        key = next(k for k in new_d if k.startswith("No, but"))
        new_d["No, but..."] = new_d.pop(key)
        labels = ["Yes", "No, but...", "No"]
        old = old[0], 0, old[1]
    elif "Yes" in new_d:
        # Yes/no
        labels = ["Yes", "No"]
    elif "Python 2.x" in new_d:
        # Python 2.x/Python 3.x
        labels = ["Python 2.x", "Python 3.x"]
    else:
        raise ValueError("Don't know those keys: %s" % list(new_d))
    new = itemgetter(*labels)(new_d)

    # normalise
    new_answer_count = sum(new)
    new = tuple(100 * x / new_answer_count for x in new)
    old_answer_count = sum(old)
    old = tuple(100 * x / old_answer_count for x in old)

    graph_indices = [i for i, _ in enumerate(labels)]
    width = .35

    fig, ax = plt.subplots()
    old_rects = ax.bar(graph_indices, old, width, color='r')
    new_rects = ax.bar([i + width for i in graph_indices], new, width, color='b')

    ax.set_ylabel("%")
    ax.set_xticks([i + width for i in graph_indices])
    ax.set_xticklabels(labels)
    ax.legend((old_rects[0], new_rects[0]), ("2013", "2014"))
    ax.set_ylim([0, 100])

    save(question_name)


 def deal_with_new_question(question_name, new, relative=False):
    # new: {version: count}
    new = sorted(new.items())
    labels, values = zip(*new)
    if relative:
        total_answer_count = sum(values)
        values = [100 * value / total_answer_count for value in values]

    graph_indices = range(len(labels))

    width = .70
    fig, ax = plt.subplots()
    ax.bar(graph_indices, values, width)
    ax.set_xticks([i + width / 2 for i in graph_indices])
    ax.set_xticklabels(labels)

    if relative:
        ax.set_ylabel("%")
        ax.set_ylim([0, 100])
    else:
        ax.set_ylabel("reponses")

    save(question_name)


 def deal_with_blocking(new):
    big_problems = [(p, count) for p, count in new.most_common(5) if p]
    labels, values = zip(*big_problems)
    graph_indices = range(len(labels))
    fix, ax = plt.subplots()
    width = .30
    ax.bar(graph_indices, values, width)

    ax.set_ylabel("respondents")
    ax.set_xticks([i + width / 2 for i in graph_indices])
    ax.set_xticklabels(labels, rotation=45)
    ax.set_ylim([0, values[0] * 1.1])

    save("blocking", dpi=80)


 if __name__ == '__main__':
    build_graphs()
	import csv
	from collections import Counter
	from operator import itemgetter
	from matplotlib import rcParams
	import matplotlib.pyplot as plt

	rcParams.update({'figure.autolayout': True})


	answers_2013 = {"written_python2": (4660, 119),
	"written_python3": (2862, 1908),
	"more_python2_or_python3": (3692, 1036),
	"python3_mistake": (1071, 3603),
	"dependencies_python3": (2883, 1808),
	"ported_2_to_3": (1554, 3179),
	"used_2to3": (768, 3953),
	"used_3to2": (91, 4627),
	"polyglot": (1473, 3261)}

	questions_2014 = {
	"written_python2": "Have you ever written code in Python 2.x?",
	"written_python3": "Have you ever written code in Python 3.x?",
	"more_python2_or_python3": "Do you currently write more code in Python 2.x or Python 3.x?",
	"personal_choice": "When starting a personal project, which Python version do you use?",
	"versions": "Which Python versions do you regularly use?",
	"python3_mistake": "Do you think Python 3.x was a mistake?",
	"ported_2_to_3": "Have you ever ported code from Python 2.x to Python 3.x?",
	"used_2to3": "Have you ever written/ported code using 2to3?",
	"used_3to2": "Have you ever written/ported code using 3to2?",
	"polyglot": "Have you ever written/ported code to run on Python 2.x and Python 3.x unmodified?",
	"blocking": "If applicable, what keeps you from leaving python 2 for python 3?"}
	answers_2014_filename = "survey_answers.csv"


	common_questions = set(answers_2013) & set(questions_2014)
	new_questions = set(questions_2014) - set(answers_2013)
	multi_choices_questions = {"blocking", "versions"} # both are new


	def build_graphs():
	answers_2014 = parse_answers()

	# deal with common questions
	for q in common_questions:
	deal_with_question(q, answers_2013[q], answers_2014[q])

	# strip "python" prefix
	prefix_len = len("Python ")
	perso = {q[prefix_len:]: v for q, v in answers_2014["versions"].items()}
	deal_with_new_question("versions", perso)

	deal_with_new_question("personal_choice", answers_2014["personal_choice"],
	relative=True)

	deal_with_blocking(answers_2014["blocking"])


	def parse_answers():
	phrase_to_question = {v: k for k, v in questions_2014.items()}
	answers = {q: Counter() for q in questions_2014}
	with open(answers_2014_filename, "r") as h:
	content = csv.DictReader(h)
	for row in content:
	for phrase, ans in row.items():
	if phrase not in phrase_to_question:
	continue
	q = phrase_to_question[phrase]
	if q in multi_choices_questions:
	for ans in ans.split(','):
	answers[q][ans.strip()] += 1
	else:
	answers[q][ans] += 1
	return answers


	def save(fname, dpi=60): # folder must already exist
	plt.savefig("../images/python-survey/" + fname, dpi=dpi)


	def deal_with_question(question_name, old, new_d):
	"""
	Build a barchart for the given question

	Example input:
	old = (4660, 119)
	new_d = {'Yes': 6424, 'No': 191}
	# first: label old, order new_d
	"""
	if question_name == "python3_mistake":
	# shorten the answer, fill old responses
	key = next(k for k in new_d if k.startswith("No, but"))
	new_d["No, but..."] = new_d.pop(key)
	labels = ["Yes", "No, but...", "No"]
	old = old[0], 0, old[1]
	elif "Yes" in new_d:
	# Yes/no
	labels = ["Yes", "No"]
	elif "Python 2.x" in new_d:
	# Python 2.x/Python 3.x
	labels = ["Python 2.x", "Python 3.x"]
	else:
	raise ValueError("Don't know those keys: %s" % list(new_d))
	new = itemgetter(*labels)(new_d)

	# normalise
	new_answer_count = sum(new)
	new = tuple(100 * x / new_answer_count for x in new)
	old_answer_count = sum(old)
	old = tuple(100 * x / old_answer_count for x in old)

	graph_indices = [i for i, _ in enumerate(labels)]
	width = .35

	fig, ax = plt.subplots()
	old_rects = ax.bar(graph_indices, old, width, color='r')
	new_rects = ax.bar([i + width for i in graph_indices], new, width, color='b')

	ax.set_ylabel("%")
	ax.set_xticks([i + width for i in graph_indices])
	ax.set_xticklabels(labels)
	ax.legend((old_rects[0], new_rects[0]), ("2013", "2014"))
	ax.set_ylim([0, 100])

	save(question_name)


	def deal_with_new_question(question_name, new, relative=False):
	# new: {version: count}
	new = sorted(new.items())
	labels, values = zip(*new)
	if relative:
	total_answer_count = sum(values)
	values = [100 * value / total_answer_count for value in values]

	graph_indices = range(len(labels))

	width = .70
	fig, ax = plt.subplots()
	ax.bar(graph_indices, values, width)
	ax.set_xticks([i + width / 2 for i in graph_indices])
	ax.set_xticklabels(labels)

	if relative:
	ax.set_ylabel("%")
	ax.set_ylim([0, 100])
	else:
	ax.set_ylabel("reponses")

	save(question_name)


	def deal_with_blocking(new):
	big_problems = [(p, count) for p, count in new.most_common(5) if p]
	labels, values = zip(*big_problems)
	graph_indices = range(len(labels))
	fix, ax = plt.subplots()
	width = .30
	ax.bar(graph_indices, values, width)

	ax.set_ylabel("respondents")
	ax.set_xticks([i + width / 2 for i in graph_indices])
	ax.set_xticklabels(labels, rotation=45)
	ax.set_ylim([0, values[0] * 1.1])

	save("blocking", dpi=80)


	if __name__ == '__main__':
	build_graphs()