Skip to content

Instantly share code, notes, and snippets.

@angersa
Forked from JnyJny/grade_csv.py
Last active February 9, 2020 21:49
Show Gist options
  • Save angersa/7640ff30e6928337d902ed4cc88801fd to your computer and use it in GitHub Desktop.
Save angersa/7640ff30e6928337d902ed4cc88801fd to your computer and use it in GitHub Desktop.
Compile Student Grades from CSV Data
#!/usr/bin/env python3
import click
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Requirements:
# python3 -m pip install -U click pandas matplotlib numpy xlwt
class Grade:
"""The Grade class exists to simplify translating a floating-point value
to a traditional letter grade. The variables 'bin' and 'cotes' are
class scope, which mean they are created the first time the class
is used and not recreated. This is a small space/time optimization.
The only method defined is a classmethod, 'to_cote'.
Our other choice was to make bins and cotes global variables,
and write a function as in the original source. I will go to
great lengths to avoid global variables, they tend to cause
far more problems than they solve.
"""
bins = [0, 34, 49, 53, 56, 59, 64, 69, 72, 76, 79, 84, 89, 100.5]
cotes = ["F", "E", "D", "D+", "C-", "C", "C+", "B-", "B", "B+", "A-", "A", "A+"]
@classmethod
def to_cote(cls, total: float) -> str:
"""Given a floating point `total`, return a corresponding letter
grade.
"""
index = np.digitize(total, cls.bins) - 1
return cls.cotes[index]
def __init__(self):
raise NotImplementedError("Grade not implemented")
def read_grades(filename: str) -> pd.DataFrame:
"""Reads the contents of the given CSV file into a pandas.DataFrame.
Three columns are added to the DataFrame: Total, Cote and `Prenom, Nom`.
Total is the sum of the values between columns 5 and 11, inclusive.
Cote is the letter grade representation of Total
`Prenom, Nom` is a concatenation of Prenom and Nom columns.
"""
# Pandas has a very robust CSV parser
df = pd.read_csv(filename)
# the next three lines create three new columns in the dataframe
# derived from values read from the file.
df["Total"] = df[df.columns[5:12]].T.sum()
df["Cote"] = df.Total.apply(Grade.to_cote)
df["Prenom, Nom"] = df.Prenom + ", " + df.Nom
return df
def plot_total_distribution(
df: pd.DataFrame, preview: bool = False, output_filename: str = None
) -> None:
"""Given a pandas.DataFrame with a Total column, generates a
histogram plot. If preview is True, matplotlib.pyplot.show() is
called to display it immediately. If an output_filename is
given, the plot is saved to a file with that name.
"""
# Pandas has a great interface to matplotlib that greatly
# simplifies creating plots. This plot isn't exactly like
# the one in your original source, but it's pretty close.
df.Total.plot.hist()
if output_filename:
print("saving plot to: ", output_filename)
plt.savefig(output_filename, bbox_inches="tight")
if preview:
print("Previewing, close window to continue...")
plt.show()
def write_summary_xlsx(df, output_filename: str) -> None:
"""Given a pandas.DataFrame with columns 'Prenom, Nom',
Matricule, and Cote, generates a summary XLSX spreadsheet
file and writes it to the given output_filename.
"""
fullname = "Prenom, Nom"
# here we create a summary DataFrame composed of three
# columns, "Prenom, Nom", Matricule and Cote.
summary = df[[fullname, "Matricule", "Cote"]]
# The next line changes the index of the dataframe to
# the fullname column and "drops" that column and modifies
# the current 'summary' dataframe instead of returning a
# new modified Dataframe (the pandas default mode of operation).
summary.set_index(fullname, drop=True, inplace=True)
# Ok this is magic, but the good kind. It's simple.
if output_filename.endswith(".xlsx"):
summary.to_excel(output_filename)
return
writer = pd.ExcelWriter(output_filename)
summary.to_excel(writer)
writer.save()
@click.command()
@click.argument("input-file", type=click.Path(readable=True))
@click.option(
"-p",
"--output-plot",
type=click.Path(exists=False),
default=None,
help="Grade distribution graph in PNG format.",
)
@click.option("-P", "--preview-plot", is_flag=True, default=False)
@click.option("-x", "--output-xls", type=click.Path(), default=None)
def main_cli(input_file, output_plot, preview_plot, output_xls):
"""Processes the student grade data from a CSV file,
optionally generates a grade distribution plot with preview,
and optionally writes student grade data to an Excel spreadsheet.
"""
# step one, read student data
df = read_grades(input_file)
# the pandas.DataFrame.describe function is essentially
# your print_stats function.
print(df.Total.describe())
# if the user specifies output_xls, the student data
# is summarized and written to an Excel file.
if output_xls:
write_summary_xlsx(df, output_xls)
print("wrote XLS summary to: ", output_xls)
plot_total_distribution(df, preview_plot, output_plot)
if __name__ == "__main__":
main_cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment