Last active
January 6, 2023 18:47
-
-
Save lschwetlick/d8d334d18044986eb7fb56d4b67d7f44 to your computer and use it in GitHub Desktop.
How to procrastinate writing your thesis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import time | |
import seaborn as sns | |
from matplotlib import pyplot as plt | |
import matplotlib.gridspec as gridspec | |
label_dict = { | |
"0_preface/abstract.tex" : "thesis noncontent", | |
'0_preface/abstractGerman.tex' : "thesis noncontent", | |
'0_preface/acknowledgments.tex' : "thesis noncontent", | |
'1_introduction/introduction.tex' : "thesis content", | |
'2_papers/paper1_SceneWalk/Supplement.tex' : "paper", | |
'2_papers/paper1_SceneWalk/main.tex' : "paper", | |
'2_papers/paper2_AntWalk/main.tex' : "paper", | |
'2_papers/paper2_AntWalk/coverpage.tex': "paper", | |
'2_papers/paper1_SceneWalk/coverpage.tex': "paper", | |
'2_papers/papers.tex' : "thesis content", | |
'3_discussion/discussion.tex' : "thesis content", | |
'basic_info.tex' : "latex", | |
'format/layout.tex' : "latex", | |
'format/print_info.tex' : "latex", | |
'format/title_page.tex' : "latex", | |
'main.tex' : "latex" | |
} | |
YLAB = 15 | |
TITLE = 15 | |
COLORS = ["orange", "darkgreen", "seagreen", "teal", "limegreen"] | |
def plot_chapter_intro(ax, chap, name): | |
sns.lineplot(data=chap, x="date", y="total_words", ax=ax, color="black", linewidth=3) | |
ax.set_title(f"{name}", fontsize=TITLE) | |
ax.set_ylabel("# Words", fontsize=YLAB) | |
return ax | |
def plot_figs(ax, intro, lab): | |
sns.lineplot(data=intro, x="date", y="figs", ax=ax, color=COLORS[1], linewidth=4) | |
ax.set_title(f"# Figures in {lab}", fontsize=TITLE) | |
ax.set_ylabel("# Figures", fontsize=YLAB) | |
#ax.set_ylim(0, 20) | |
return ax | |
def plot_cites(ax, intro, lab): | |
sns.lineplot(data=intro, x="date", y="cites", ax=ax, color=COLORS[2], linewidth=4) | |
ax.set_title(f"# Citations in {lab}", fontsize=TITLE) | |
ax.set_ylabel("# Citations", fontsize=YLAB) | |
#ax.set_ylim(0, 20) | |
return ax | |
def plot_qs(ax, intro, lab): | |
sns.lineplot(data=intro, x="date", y="qs", ax=ax, color=COLORS[3], linewidth=4) | |
ax.set_title(f"# '?' in {lab}", fontsize=TITLE) | |
ax.set_ylabel("# '?'", fontsize=YLAB) | |
#ax.set_ylim(0, 20) | |
return ax | |
def set_style(ax): | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.tick_params(axis="x", labelrotation=45) | |
ax.set_xlabel("") | |
return ax | |
def plot_n_words_total(ax, df_total, paper_words, bla_words): | |
sns.lineplot(data=df_total, x="date", y="total_words", ax=ax, color="black", linewidth=3) | |
ax.axhspan(0, paper_words, facecolor=COLORS[0], alpha=0.1) | |
ax.axhspan(paper_words, paper_words+bla_words, facecolor=COLORS[1], alpha=0.1) | |
ax.set_title("Words in my PhD Dissertation", fontsize=20) | |
ax.set_xlabel("Date") | |
ax.set_ylabel("# Words") | |
#xlim = ax.get_xlim() | |
ylim = ax.set_ylim(20000, ) | |
ax.annotate( "Words in the papers", [0.05 , 0.2], color=COLORS[0], xycoords='axes fraction', fontsize=15) | |
ax.annotate( "boilerplate", [0.05 , 0.45], color=COLORS[1], xycoords='axes fraction', fontsize=15) | |
ax.annotate( "Thesis Text", [0.05 , 0.7], color="black", xycoords='axes fraction', fontsize=15) | |
return ax | |
def get_file_ending(s): | |
r = s.split(".") | |
if len(r) < 2: | |
r = None | |
elif r[0] == "": | |
r = None | |
else: | |
r = r[1] | |
return(r) | |
def main(): | |
df = pd.read_csv("/Users/lisa/Documents/SFB1294_B05/Dissertation_tracking/XXX/table.csv", header=None, names=["date", "filepath", "words", "lines", "figs", "cites", "qs", "words_in_text", "words_in_headers", "words_in_captions","n_headers", "n_tab_fig", "n_math_inline", "n_math_disp", "end"]) | |
df["filepath"] = df["filepath"].str.strip() | |
df = df.drop(df[df.filepath == ".gitignore"].index) | |
df = df.reset_index(drop=True) | |
df["date"] = pd.to_datetime(df["date"]) | |
split = [os.path.split(f) for f in df["filepath"]] | |
split = [[f[0], f[1], get_file_ending(f[1])] for f in split] | |
df = pd.concat([df, pd.DataFrame(split, columns=["path", "file", "ending"])], axis=1) | |
df.loc[df.path == "", "path"] = "mainfiles" | |
df = df.drop(df[df.ending == "pdf"].index) | |
df = df.drop(df[df.ending == "txt"].index) | |
df = df.drop(df[df.ending == "ldf"].index) | |
df = df.drop(df[df.ending == "apc"].index) | |
df = df.drop(df[df.ending == "cls"].index) | |
df = df.drop(df[df.ending == "bst"].index) | |
df = df.drop(df[df.ending == "png"].index) | |
df = df.drop(df[df.ending == "sty"].index) | |
df = df.drop(df[df.ending == "bib"].index) | |
df = df.drop(df[df.ending == "jpg"].index) | |
df = df.drop(df[df.ending == "latexmkrc"].index) | |
df = df.drop(df[df.file == "latexmkrc"].index) | |
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_1.tex'].index) | |
df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_2.tex'].index) | |
df = df.reset_index(drop=True) | |
labels = [label_dict[i] for i in df.filepath] | |
df["labels"] = labels | |
df["total_words"] = df["words_in_text"] + df["words_in_headers"] + df["words_in_captions"] | |
df_by_lab = df.groupby(["date", "labels"]).total_words.sum().reset_index() | |
paper_words = df_by_lab.loc[df_by_lab.labels=="paper"].total_words.unique()[0] | |
bla_words = df_by_lab.loc[df_by_lab.labels=="thesis noncontent"].total_words.unique().max() + df_by_lab.loc[df_by_lab.labels=="latex"].total_words.unique().max() | |
#latex_words = df_by_lab.loc[df_by_lab.labels=="latex"].words.unique()[0] | |
df_total = df.groupby(["date"]).total_words.sum().reset_index() | |
intro = df.loc[df["path"] == "1_introduction"] | |
disc = df.loc[df["path"] == "3_discussion"] | |
fig = plt.figure(figsize=(20,20)) | |
gs0 = gridspec.GridSpec(2,1, left=0.05, right=1, wspace=0.1, hspace=0.3, height_ratios=[1,2]) | |
gs1 = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gs0[0]) | |
gs2 = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs0[1]) | |
gs21 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[0], hspace=0.4, wspace=0.4) | |
gs22 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[1], hspace=0.4, wspace=0.4) | |
#gs1 = gridspec.GridSpec(3, 6, left=0.05, right=1, wspace=0.2, hspace=0.4) | |
# TOTAL | |
ax1 = fig.add_subplot(gs1[0])# total | |
ax1 = plot_n_words_total(ax1, df_total, paper_words, bla_words) | |
# INTRO | |
#gs01 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, :3]) | |
ax_intro = fig.add_subplot(gs21[0, 0:]) | |
ax3 = fig.add_subplot(gs21[1, 0]) | |
ax4 = fig.add_subplot(gs21[1, 1]) | |
ax5 = fig.add_subplot(gs21[1, 2])# | |
ax_intro = plot_chapter_intro(ax_intro, intro, "# Words in Introduction") | |
ax3 = plot_figs(ax3, intro, "Introduction") | |
ax4 = plot_cites(ax4, intro, "Introduction") | |
ax5 = plot_qs(ax5, intro, "Introduction") | |
# DISCUSSION | |
#gs02 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, 3:]) | |
ax_disc = fig.add_subplot(gs22[0, 0:]) | |
ax_disc = plot_chapter_intro(ax_disc, disc, "# Words in Discussion") | |
ax22 = fig.add_subplot(gs22[1, 0]) | |
ax23 = fig.add_subplot(gs22[1, 1]) | |
ax24 = fig.add_subplot(gs22[1, 2]) | |
ax22 = plot_figs(ax22, disc, "Discussion") | |
ax23 = plot_cites(ax23, disc, "Discussion") | |
ax24 = plot_qs(ax24, disc, "Discussion") | |
ax = fig.axes | |
ax = [set_style(x) for x in ax] | |
fig.savefig("asd2.png", facecolor='white', transparent=False) | |
fig.savefig("words_in_diss.png", facecolor='white', transparent=False) | |
fig.savefig("words_in_diss.pdf") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment