How to procrastinate writing your thesis
import os
import pandas as pd
import time
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
label_dict = {
"0_preface/abstract.tex" : "thesis noncontent",
'0_preface/abstractGerman.tex' : "thesis noncontent",
'0_preface/acknowledgments.tex' : "thesis noncontent",
'1_introduction/introduction.tex' : "thesis content",
'2_papers/paper1_SceneWalk/Supplement.tex' : "paper",
'2_papers/paper1_SceneWalk/main.tex' : "paper",
'2_papers/paper2_AntWalk/main.tex' : "paper",
'2_papers/paper2_AntWalk/coverpage.tex': "paper",
'2_papers/paper1_SceneWalk/coverpage.tex': "paper",
'2_papers/papers.tex' : "thesis content",
'3_discussion/discussion.tex' : "thesis content",
'basic_info.tex' : "latex",
'format/layout.tex' : "latex",
'format/print_info.tex' : "latex",
'format/title_page.tex' : "latex",
'main.tex' : "latex"
YLAB = 15
TITLE = 15
COLORS = ["orange", "darkgreen", "seagreen", "teal", "limegreen"]
def plot_chapter_intro(ax, chap, name):
sns.lineplot(data=chap, x="date", y="total_words", ax=ax, color="black", linewidth=3)
ax.set_title(f"{name}", fontsize=TITLE)
ax.set_ylabel("# Words", fontsize=YLAB)
return ax
def plot_figs(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="figs", ax=ax, color=COLORS[1], linewidth=4)
ax.set_title(f"# Figures in {lab}", fontsize=TITLE)
ax.set_ylabel("# Figures", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def plot_cites(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="cites", ax=ax, color=COLORS[2], linewidth=4)
ax.set_title(f"# Citations in {lab}", fontsize=TITLE)
ax.set_ylabel("# Citations", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def plot_qs(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="qs", ax=ax, color=COLORS[3], linewidth=4)
ax.set_title(f"# '?' in {lab}", fontsize=TITLE)
ax.set_ylabel("# '?'", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def set_style(ax):
ax.tick_params(axis="x", labelrotation=45)
return ax
def plot_n_words_total(ax, df_total, paper_words, bla_words):
sns.lineplot(data=df_total, x="date", y="total_words", ax=ax, color="black", linewidth=3)
ax.axhspan(0, paper_words, facecolor=COLORS[0], alpha=0.1)
ax.axhspan(paper_words, paper_words+bla_words, facecolor=COLORS[1], alpha=0.1)
ax.set_title("Words in my PhD Dissertation", fontsize=20)
ax.set_ylabel("# Words")
#xlim = ax.get_xlim()
ylim = ax.set_ylim(20000, )
ax.annotate( "Words in the papers", [0.05 , 0.2], color=COLORS[0], xycoords='axes fraction', fontsize=15)
ax.annotate( "boilerplate", [0.05 , 0.45], color=COLORS[1], xycoords='axes fraction', fontsize=15)
ax.annotate( "Thesis Text", [0.05 , 0.7], color="black", xycoords='axes fraction', fontsize=15)
return ax
def get_file_ending(s):
r = s.split(".")
if len(r) < 2:
r = None
elif r[0] == "":
r = None
r = r[1]
def main():
df = pd.read_csv("/Users/lisa/Documents/SFB1294_B05/Dissertation_tracking/XXX/table.csv", header=None, names=["date", "filepath", "words", "lines", "figs", "cites", "qs", "words_in_text", "words_in_headers", "words_in_captions","n_headers", "n_tab_fig", "n_math_inline", "n_math_disp", "end"])
df["filepath"] = df["filepath"].str.strip()
df = df.drop(df[df.filepath == ".gitignore"].index)
df = df.reset_index(drop=True)
df["date"] = pd.to_datetime(df["date"])
split = [os.path.split(f) for f in df["filepath"]]
split = [[f[0], f[1], get_file_ending(f[1])] for f in split]
df = pd.concat([df, pd.DataFrame(split, columns=["path", "file", "ending"])], axis=1)
df.loc[df.path == "", "path"] = "mainfiles"
df = df.drop(df[df.ending == "pdf"].index)
df = df.drop(df[df.ending == "txt"].index)
df = df.drop(df[df.ending == "ldf"].index)
df = df.drop(df[df.ending == "apc"].index)
df = df.drop(df[df.ending == "cls"].index)
df = df.drop(df[df.ending == "bst"].index)
df = df.drop(df[df.ending == "png"].index)
df = df.drop(df[df.ending == "sty"].index)
df = df.drop(df[df.ending == "bib"].index)
df = df.drop(df[df.ending == "jpg"].index)
df = df.drop(df[df.ending == "latexmkrc"].index)
df = df.drop(df[df.file == "latexmkrc"].index)
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_1.tex'].index)
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_2.tex'].index)
df = df.reset_index(drop=True)
labels = [label_dict[i] for i in df.filepath]
df["labels"] = labels
df["total_words"] = df["words_in_text"] + df["words_in_headers"] + df["words_in_captions"]
df_by_lab = df.groupby(["date", "labels"]).total_words.sum().reset_index()
paper_words = df_by_lab.loc[df_by_lab.labels=="paper"].total_words.unique()[0]
bla_words = df_by_lab.loc[df_by_lab.labels=="thesis noncontent"].total_words.unique().max() + df_by_lab.loc[df_by_lab.labels=="latex"].total_words.unique().max()
#latex_words = df_by_lab.loc[df_by_lab.labels=="latex"].words.unique()[0]
df_total = df.groupby(["date"]).total_words.sum().reset_index()
intro = df.loc[df["path"] == "1_introduction"]
disc = df.loc[df["path"] == "3_discussion"]
fig = plt.figure(figsize=(20,20))
gs0 = gridspec.GridSpec(2,1, left=0.05, right=1, wspace=0.1, hspace=0.3, height_ratios=[1,2])
gs1 = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gs0[0])
gs2 = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs0[1])
gs21 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[0], hspace=0.4, wspace=0.4)
gs22 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[1], hspace=0.4, wspace=0.4)
#gs1 = gridspec.GridSpec(3, 6, left=0.05, right=1, wspace=0.2, hspace=0.4)
ax1 = fig.add_subplot(gs1[0])# total
ax1 = plot_n_words_total(ax1, df_total, paper_words, bla_words)
#gs01 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, :3])
ax_intro = fig.add_subplot(gs21[0, 0:])
ax3 = fig.add_subplot(gs21[1, 0])
ax4 = fig.add_subplot(gs21[1, 1])
ax5 = fig.add_subplot(gs21[1, 2])#
ax_intro = plot_chapter_intro(ax_intro, intro, "# Words in Introduction")
ax3 = plot_figs(ax3, intro, "Introduction")
ax4 = plot_cites(ax4, intro, "Introduction")
ax5 = plot_qs(ax5, intro, "Introduction")
#gs02 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, 3:])
ax_disc = fig.add_subplot(gs22[0, 0:])
ax_disc = plot_chapter_intro(ax_disc, disc, "# Words in Discussion")
ax22 = fig.add_subplot(gs22[1, 0])
ax23 = fig.add_subplot(gs22[1, 1])
ax24 = fig.add_subplot(gs22[1, 2])
ax22 = plot_figs(ax22, disc, "Discussion")
ax23 = plot_cites(ax23, disc, "Discussion")
ax24 = plot_qs(ax24, disc, "Discussion")
ax = fig.axes
ax = [set_style(x) for x in ax]
fig.savefig("asd2.png", facecolor='white', transparent=False)
fig.savefig("words_in_diss.png", facecolor='white', transparent=False)
if __name__ == "__main__":
