-
-
Save angersa/7640ff30e6928337d902ed4cc88801fd to your computer and use it in GitHub Desktop.
Compile Student Grades from CSV Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import click | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
# Requirements: | |
# python3 -m pip install -U click pandas matplotlib numpy xlwt | |
class Grade: | |
"""The Grade class exists to simplify translating a floating-point value | |
to a traditional letter grade. The variables 'bin' and 'cotes' are | |
class scope, which mean they are created the first time the class | |
is used and not recreated. This is a small space/time optimization. | |
The only method defined is a classmethod, 'to_cote'. | |
Our other choice was to make bins and cotes global variables, | |
and write a function as in the original source. I will go to | |
great lengths to avoid global variables, they tend to cause | |
far more problems than they solve. | |
""" | |
bins = [0, 34, 49, 53, 56, 59, 64, 69, 72, 76, 79, 84, 89, 100.5] | |
cotes = ["F", "E", "D", "D+", "C-", "C", "C+", "B-", "B", "B+", "A-", "A", "A+"] | |
@classmethod | |
def to_cote(cls, total: float) -> str: | |
"""Given a floating point `total`, return a corresponding letter | |
grade. | |
""" | |
index = np.digitize(total, cls.bins) - 1 | |
return cls.cotes[index] | |
def __init__(self): | |
raise NotImplementedError("Grade not implemented") | |
def read_grades(filename: str) -> pd.DataFrame: | |
"""Reads the contents of the given CSV file into a pandas.DataFrame. | |
Three columns are added to the DataFrame: Total, Cote and `Prenom, Nom`. | |
Total is the sum of the values between columns 5 and 11, inclusive. | |
Cote is the letter grade representation of Total | |
`Prenom, Nom` is a concatenation of Prenom and Nom columns. | |
""" | |
# Pandas has a very robust CSV parser | |
df = pd.read_csv(filename) | |
# the next three lines create three new columns in the dataframe | |
# derived from values read from the file. | |
df["Total"] = df[df.columns[5:12]].T.sum() | |
df["Cote"] = df.Total.apply(Grade.to_cote) | |
df["Prenom, Nom"] = df.Prenom + ", " + df.Nom | |
return df | |
def plot_total_distribution( | |
df: pd.DataFrame, preview: bool = False, output_filename: str = None | |
) -> None: | |
"""Given a pandas.DataFrame with a Total column, generates a | |
histogram plot. If preview is True, matplotlib.pyplot.show() is | |
called to display it immediately. If an output_filename is | |
given, the plot is saved to a file with that name. | |
""" | |
# Pandas has a great interface to matplotlib that greatly | |
# simplifies creating plots. This plot isn't exactly like | |
# the one in your original source, but it's pretty close. | |
df.Total.plot.hist() | |
if output_filename: | |
print("saving plot to: ", output_filename) | |
plt.savefig(output_filename, bbox_inches="tight") | |
if preview: | |
print("Previewing, close window to continue...") | |
plt.show() | |
def write_summary_xlsx(df, output_filename: str) -> None: | |
"""Given a pandas.DataFrame with columns 'Prenom, Nom', | |
Matricule, and Cote, generates a summary XLSX spreadsheet | |
file and writes it to the given output_filename. | |
""" | |
fullname = "Prenom, Nom" | |
# here we create a summary DataFrame composed of three | |
# columns, "Prenom, Nom", Matricule and Cote. | |
summary = df[[fullname, "Matricule", "Cote"]] | |
# The next line changes the index of the dataframe to | |
# the fullname column and "drops" that column and modifies | |
# the current 'summary' dataframe instead of returning a | |
# new modified Dataframe (the pandas default mode of operation). | |
summary.set_index(fullname, drop=True, inplace=True) | |
# Ok this is magic, but the good kind. It's simple. | |
if output_filename.endswith(".xlsx"): | |
summary.to_excel(output_filename) | |
return | |
writer = pd.ExcelWriter(output_filename) | |
summary.to_excel(writer) | |
writer.save() | |
@click.command() | |
@click.argument("input-file", type=click.Path(readable=True)) | |
@click.option( | |
"-p", | |
"--output-plot", | |
type=click.Path(exists=False), | |
default=None, | |
help="Grade distribution graph in PNG format.", | |
) | |
@click.option("-P", "--preview-plot", is_flag=True, default=False) | |
@click.option("-x", "--output-xls", type=click.Path(), default=None) | |
def main_cli(input_file, output_plot, preview_plot, output_xls): | |
"""Processes the student grade data from a CSV file, | |
optionally generates a grade distribution plot with preview, | |
and optionally writes student grade data to an Excel spreadsheet. | |
""" | |
# step one, read student data | |
df = read_grades(input_file) | |
# the pandas.DataFrame.describe function is essentially | |
# your print_stats function. | |
print(df.Total.describe()) | |
# if the user specifies output_xls, the student data | |
# is summarized and written to an Excel file. | |
if output_xls: | |
write_summary_xlsx(df, output_xls) | |
print("wrote XLS summary to: ", output_xls) | |
plot_total_distribution(df, preview_plot, output_plot) | |
if __name__ == "__main__": | |
main_cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment