Skip to content

Instantly share code, notes, and snippets.

@lschwetlick
Last active January 6, 2023 18:47
Show Gist options
  • Save lschwetlick/d8d334d18044986eb7fb56d4b67d7f44 to your computer and use it in GitHub Desktop.
Save lschwetlick/d8d334d18044986eb7fb56d4b67d7f44 to your computer and use it in GitHub Desktop.
How to procrastinate writing your thesis
import os
import pandas as pd
import time
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.gridspec as gridspec
label_dict = {
"0_preface/abstract.tex" : "thesis noncontent",
'0_preface/abstractGerman.tex' : "thesis noncontent",
'0_preface/acknowledgments.tex' : "thesis noncontent",
'1_introduction/introduction.tex' : "thesis content",
'2_papers/paper1_SceneWalk/Supplement.tex' : "paper",
'2_papers/paper1_SceneWalk/main.tex' : "paper",
'2_papers/paper2_AntWalk/main.tex' : "paper",
'2_papers/paper2_AntWalk/coverpage.tex': "paper",
'2_papers/paper1_SceneWalk/coverpage.tex': "paper",
'2_papers/papers.tex' : "thesis content",
'3_discussion/discussion.tex' : "thesis content",
'basic_info.tex' : "latex",
'format/layout.tex' : "latex",
'format/print_info.tex' : "latex",
'format/title_page.tex' : "latex",
'main.tex' : "latex"
}
YLAB = 15
TITLE = 15
COLORS = ["orange", "darkgreen", "seagreen", "teal", "limegreen"]
def plot_chapter_intro(ax, chap, name):
sns.lineplot(data=chap, x="date", y="total_words", ax=ax, color="black", linewidth=3)
ax.set_title(f"{name}", fontsize=TITLE)
ax.set_ylabel("# Words", fontsize=YLAB)
return ax
def plot_figs(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="figs", ax=ax, color=COLORS[1], linewidth=4)
ax.set_title(f"# Figures in {lab}", fontsize=TITLE)
ax.set_ylabel("# Figures", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def plot_cites(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="cites", ax=ax, color=COLORS[2], linewidth=4)
ax.set_title(f"# Citations in {lab}", fontsize=TITLE)
ax.set_ylabel("# Citations", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def plot_qs(ax, intro, lab):
sns.lineplot(data=intro, x="date", y="qs", ax=ax, color=COLORS[3], linewidth=4)
ax.set_title(f"# '?' in {lab}", fontsize=TITLE)
ax.set_ylabel("# '?'", fontsize=YLAB)
#ax.set_ylim(0, 20)
return ax
def set_style(ax):
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis="x", labelrotation=45)
ax.set_xlabel("")
return ax
def plot_n_words_total(ax, df_total, paper_words, bla_words):
sns.lineplot(data=df_total, x="date", y="total_words", ax=ax, color="black", linewidth=3)
ax.axhspan(0, paper_words, facecolor=COLORS[0], alpha=0.1)
ax.axhspan(paper_words, paper_words+bla_words, facecolor=COLORS[1], alpha=0.1)
ax.set_title("Words in my PhD Dissertation", fontsize=20)
ax.set_xlabel("Date")
ax.set_ylabel("# Words")
#xlim = ax.get_xlim()
ylim = ax.set_ylim(20000, )
ax.annotate( "Words in the papers", [0.05 , 0.2], color=COLORS[0], xycoords='axes fraction', fontsize=15)
ax.annotate( "boilerplate", [0.05 , 0.45], color=COLORS[1], xycoords='axes fraction', fontsize=15)
ax.annotate( "Thesis Text", [0.05 , 0.7], color="black", xycoords='axes fraction', fontsize=15)
return ax
def get_file_ending(s):
r = s.split(".")
if len(r) < 2:
r = None
elif r[0] == "":
r = None
else:
r = r[1]
return(r)
def main():
df = pd.read_csv("/Users/lisa/Documents/SFB1294_B05/Dissertation_tracking/XXX/table.csv", header=None, names=["date", "filepath", "words", "lines", "figs", "cites", "qs", "words_in_text", "words_in_headers", "words_in_captions","n_headers", "n_tab_fig", "n_math_inline", "n_math_disp", "end"])
df["filepath"] = df["filepath"].str.strip()
df = df.drop(df[df.filepath == ".gitignore"].index)
df = df.reset_index(drop=True)
df["date"] = pd.to_datetime(df["date"])
split = [os.path.split(f) for f in df["filepath"]]
split = [[f[0], f[1], get_file_ending(f[1])] for f in split]
df = pd.concat([df, pd.DataFrame(split, columns=["path", "file", "ending"])], axis=1)
df.loc[df.path == "", "path"] = "mainfiles"
df = df.drop(df[df.ending == "pdf"].index)
df = df.drop(df[df.ending == "txt"].index)
df = df.drop(df[df.ending == "ldf"].index)
df = df.drop(df[df.ending == "apc"].index)
df = df.drop(df[df.ending == "cls"].index)
df = df.drop(df[df.ending == "bst"].index)
df = df.drop(df[df.ending == "png"].index)
df = df.drop(df[df.ending == "sty"].index)
df = df.drop(df[df.ending == "bib"].index)
df = df.drop(df[df.ending == "jpg"].index)
df = df.drop(df[df.ending == "latexmkrc"].index)
df = df.drop(df[df.file == "latexmkrc"].index)
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_1.tex'].index)
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_2.tex'].index)
df = df.reset_index(drop=True)
labels = [label_dict[i] for i in df.filepath]
df["labels"] = labels
df["total_words"] = df["words_in_text"] + df["words_in_headers"] + df["words_in_captions"]
df_by_lab = df.groupby(["date", "labels"]).total_words.sum().reset_index()
paper_words = df_by_lab.loc[df_by_lab.labels=="paper"].total_words.unique()[0]
bla_words = df_by_lab.loc[df_by_lab.labels=="thesis noncontent"].total_words.unique().max() + df_by_lab.loc[df_by_lab.labels=="latex"].total_words.unique().max()
#latex_words = df_by_lab.loc[df_by_lab.labels=="latex"].words.unique()[0]
df_total = df.groupby(["date"]).total_words.sum().reset_index()
intro = df.loc[df["path"] == "1_introduction"]
disc = df.loc[df["path"] == "3_discussion"]
fig = plt.figure(figsize=(20,20))
gs0 = gridspec.GridSpec(2,1, left=0.05, right=1, wspace=0.1, hspace=0.3, height_ratios=[1,2])
gs1 = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gs0[0])
gs2 = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs0[1])
gs21 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[0], hspace=0.4, wspace=0.4)
gs22 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[1], hspace=0.4, wspace=0.4)
#gs1 = gridspec.GridSpec(3, 6, left=0.05, right=1, wspace=0.2, hspace=0.4)
# TOTAL
ax1 = fig.add_subplot(gs1[0])# total
ax1 = plot_n_words_total(ax1, df_total, paper_words, bla_words)
# INTRO
#gs01 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, :3])
ax_intro = fig.add_subplot(gs21[0, 0:])
ax3 = fig.add_subplot(gs21[1, 0])
ax4 = fig.add_subplot(gs21[1, 1])
ax5 = fig.add_subplot(gs21[1, 2])#
ax_intro = plot_chapter_intro(ax_intro, intro, "# Words in Introduction")
ax3 = plot_figs(ax3, intro, "Introduction")
ax4 = plot_cites(ax4, intro, "Introduction")
ax5 = plot_qs(ax5, intro, "Introduction")
# DISCUSSION
#gs02 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, 3:])
ax_disc = fig.add_subplot(gs22[0, 0:])
ax_disc = plot_chapter_intro(ax_disc, disc, "# Words in Discussion")
ax22 = fig.add_subplot(gs22[1, 0])
ax23 = fig.add_subplot(gs22[1, 1])
ax24 = fig.add_subplot(gs22[1, 2])
ax22 = plot_figs(ax22, disc, "Discussion")
ax23 = plot_cites(ax23, disc, "Discussion")
ax24 = plot_qs(ax24, disc, "Discussion")
ax = fig.axes
ax = [set_style(x) for x in ax]
fig.savefig("asd2.png", facecolor='white', transparent=False)
fig.savefig("words_in_diss.png", facecolor='white', transparent=False)
fig.savefig("words_in_diss.pdf")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment