lschwetlick · January 6, 2023 18:47
diff --git a/makeplot.py b/makeplot.py
 import os
 import pandas as pd
 import time
 import seaborn as sns
 from matplotlib import pyplot as plt
 import matplotlib.gridspec as gridspec

 label_dict = {
    "0_preface/abstract.tex" : "thesis noncontent",
    '0_preface/abstractGerman.tex' : "thesis noncontent",
    '0_preface/acknowledgments.tex' : "thesis noncontent",
    '1_introduction/introduction.tex' : "thesis content",
    '2_papers/paper1_SceneWalk/Supplement.tex' : "paper",
    '2_papers/paper1_SceneWalk/main.tex' : "paper",
    '2_papers/paper2_AntWalk/main.tex'  : "paper",
    '2_papers/paper2_AntWalk/coverpage.tex': "paper",
    '2_papers/paper1_SceneWalk/coverpage.tex': "paper",
    '2_papers/papers.tex'  : "thesis content",
    '3_discussion/discussion.tex' : "thesis content",
    'basic_info.tex' : "latex",
    'format/layout.tex' : "latex",
    'format/print_info.tex' : "latex",
    'format/title_page.tex' : "latex",
    'main.tex' : "latex"
 }

 YLAB = 15
 TITLE = 15

 COLORS = ["orange", "darkgreen", "seagreen", "teal", "limegreen"]


 def plot_chapter_intro(ax, chap, name):
  sns.lineplot(data=chap, x="date", y="total_words", ax=ax, color="black", linewidth=3)
  ax.set_title(f"{name}", fontsize=TITLE)
  ax.set_ylabel("# Words", fontsize=YLAB)
  return ax

 def plot_figs(ax, intro, lab):
  sns.lineplot(data=intro, x="date", y="figs", ax=ax, color=COLORS[1], linewidth=4)
  ax.set_title(f"# Figures in {lab}", fontsize=TITLE)
  ax.set_ylabel("# Figures", fontsize=YLAB)
  #ax.set_ylim(0, 20)
  return ax
  
 def plot_cites(ax, intro, lab):
    sns.lineplot(data=intro, x="date", y="cites", ax=ax, color=COLORS[2], linewidth=4)
    ax.set_title(f"# Citations in {lab}", fontsize=TITLE)
    ax.set_ylabel("# Citations", fontsize=YLAB)
    #ax.set_ylim(0, 20)
    return ax
  
 def plot_qs(ax, intro, lab):
    sns.lineplot(data=intro, x="date", y="qs", ax=ax, color=COLORS[3], linewidth=4)
    ax.set_title(f"# '?' in {lab}", fontsize=TITLE)
    ax.set_ylabel("# '?'", fontsize=YLAB)
    #ax.set_ylim(0, 20)
    return ax


 def set_style(ax):
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.tick_params(axis="x", labelrotation=45)
    ax.set_xlabel("")
    return ax


 def plot_n_words_total(ax, df_total, paper_words, bla_words):
    sns.lineplot(data=df_total, x="date", y="total_words", ax=ax, color="black", linewidth=3)
    ax.axhspan(0, paper_words, facecolor=COLORS[0], alpha=0.1)
    ax.axhspan(paper_words, paper_words+bla_words, facecolor=COLORS[1], alpha=0.1)
    ax.set_title("Words in my PhD Dissertation", fontsize=20)
    ax.set_xlabel("Date")
    ax.set_ylabel("# Words")
    #xlim = ax.get_xlim()
    ylim = ax.set_ylim(20000, )

    ax.annotate( "Words in the papers", [0.05 , 0.2], color=COLORS[0], xycoords='axes fraction', fontsize=15)
    ax.annotate( "boilerplate", [0.05 , 0.45], color=COLORS[1], xycoords='axes fraction', fontsize=15)
    ax.annotate( "Thesis Text", [0.05 , 0.7], color="black", xycoords='axes fraction', fontsize=15)

    return ax


 def get_file_ending(s):
    r = s.split(".")
    if len(r) < 2:
        r = None
    elif r[0] == "":
        r = None
    else:
        r = r[1]
    return(r)

 def main():
    df = pd.read_csv("/Users/lisa/Documents/SFB1294_B05/Dissertation_tracking/XXX/table.csv", header=None, names=["date", "filepath", "words", "lines", "figs", "cites", "qs", "words_in_text", "words_in_headers", "words_in_captions","n_headers", "n_tab_fig", "n_math_inline", "n_math_disp", "end"])

    df["filepath"] = df["filepath"].str.strip()
    df = df.drop(df[df.filepath == ".gitignore"].index)
    df = df.reset_index(drop=True)
    df["date"] = pd.to_datetime(df["date"])


    split = [os.path.split(f) for f in df["filepath"]]
    split = [[f[0], f[1], get_file_ending(f[1])] for f in split]
    df = pd.concat([df, pd.DataFrame(split, columns=["path", "file", "ending"])], axis=1)
    df.loc[df.path == "", "path"] = "mainfiles"
    
    df = df.drop(df[df.ending == "pdf"].index)
    df = df.drop(df[df.ending == "txt"].index)
    df = df.drop(df[df.ending == "ldf"].index)
    df = df.drop(df[df.ending == "apc"].index)
    df = df.drop(df[df.ending == "cls"].index)
    df = df.drop(df[df.ending == "bst"].index)
    df = df.drop(df[df.ending == "png"].index)
    df = df.drop(df[df.ending == "sty"].index)
    df = df.drop(df[df.ending == "bib"].index)
    df = df.drop(df[df.ending == "jpg"].index)
    df = df.drop(df[df.ending == "latexmkrc"].index)
    df = df.drop(df[df.file == "latexmkrc"].index)
    df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_1.tex'].index)
    df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_2.tex'].index)
    df = df.reset_index(drop=True)
    labels = [label_dict[i] for i in df.filepath]
    df["labels"] = labels
    
    df["total_words"] = df["words_in_text"] + df["words_in_headers"] + df["words_in_captions"]
    
    df_by_lab = df.groupby(["date", "labels"]).total_words.sum().reset_index()
    paper_words = df_by_lab.loc[df_by_lab.labels=="paper"].total_words.unique()[0]
    bla_words = df_by_lab.loc[df_by_lab.labels=="thesis noncontent"].total_words.unique().max() + df_by_lab.loc[df_by_lab.labels=="latex"].total_words.unique().max()
    #latex_words = df_by_lab.loc[df_by_lab.labels=="latex"].words.unique()[0]
    
    df_total = df.groupby(["date"]).total_words.sum().reset_index()
    intro = df.loc[df["path"] == "1_introduction"]
    disc = df.loc[df["path"] == "3_discussion"]
        
    fig = plt.figure(figsize=(20,20))
    gs0 = gridspec.GridSpec(2,1, left=0.05, right=1, wspace=0.1, hspace=0.3, height_ratios=[1,2])
    gs1 = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gs0[0])
    gs2 = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs0[1])

    gs21 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[0], hspace=0.4, wspace=0.4)
    gs22 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[1], hspace=0.4, wspace=0.4)
    #gs1 = gridspec.GridSpec(3, 6, left=0.05, right=1, wspace=0.2, hspace=0.4)

    # TOTAL
    ax1 = fig.add_subplot(gs1[0])# total
    ax1 = plot_n_words_total(ax1, df_total, paper_words, bla_words)

    # INTRO
    #gs01 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, :3])
    ax_intro = fig.add_subplot(gs21[0, 0:])

    ax3 = fig.add_subplot(gs21[1, 0])
    ax4 = fig.add_subplot(gs21[1, 1])
    ax5 = fig.add_subplot(gs21[1, 2])#

    ax_intro = plot_chapter_intro(ax_intro, intro, "# Words in Introduction")
    ax3 = plot_figs(ax3, intro, "Introduction")
    ax4 = plot_cites(ax4, intro, "Introduction")
    ax5 = plot_qs(ax5, intro, "Introduction")

    # DISCUSSION
    #gs02 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, 3:])
    ax_disc = fig.add_subplot(gs22[0, 0:])

    ax_disc = plot_chapter_intro(ax_disc, disc, "# Words in Discussion")

    ax22 = fig.add_subplot(gs22[1, 0])
    ax23 = fig.add_subplot(gs22[1, 1])
    ax24 = fig.add_subplot(gs22[1, 2])

    ax22 = plot_figs(ax22, disc, "Discussion")
    ax23 = plot_cites(ax23, disc, "Discussion")
    ax24 = plot_qs(ax24, disc, "Discussion")

    ax = fig.axes
    ax = [set_style(x) for x in ax]

    fig.savefig("asd2.png", facecolor='white', transparent=False)
    fig.savefig("words_in_diss.png", facecolor='white', transparent=False)
    fig.savefig("words_in_diss.pdf")

 if __name__ == "__main__":
    main()
	import os
	import pandas as pd
	import time
	import seaborn as sns
	from matplotlib import pyplot as plt
	import matplotlib.gridspec as gridspec

	label_dict = {
	"0_preface/abstract.tex" : "thesis noncontent",
	'0_preface/abstractGerman.tex' : "thesis noncontent",
	'0_preface/acknowledgments.tex' : "thesis noncontent",
	'1_introduction/introduction.tex' : "thesis content",
	'2_papers/paper1_SceneWalk/Supplement.tex' : "paper",
	'2_papers/paper1_SceneWalk/main.tex' : "paper",
	'2_papers/paper2_AntWalk/main.tex' : "paper",
	'2_papers/paper2_AntWalk/coverpage.tex': "paper",
	'2_papers/paper1_SceneWalk/coverpage.tex': "paper",
	'2_papers/papers.tex' : "thesis content",
	'3_discussion/discussion.tex' : "thesis content",
	'basic_info.tex' : "latex",
	'format/layout.tex' : "latex",
	'format/print_info.tex' : "latex",
	'format/title_page.tex' : "latex",
	'main.tex' : "latex"
	}

	YLAB = 15
	TITLE = 15

	COLORS = ["orange", "darkgreen", "seagreen", "teal", "limegreen"]


	def plot_chapter_intro(ax, chap, name):
	sns.lineplot(data=chap, x="date", y="total_words", ax=ax, color="black", linewidth=3)
	ax.set_title(f"{name}", fontsize=TITLE)
	ax.set_ylabel("# Words", fontsize=YLAB)
	return ax

	def plot_figs(ax, intro, lab):
	sns.lineplot(data=intro, x="date", y="figs", ax=ax, color=COLORS[1], linewidth=4)
	ax.set_title(f"# Figures in {lab}", fontsize=TITLE)
	ax.set_ylabel("# Figures", fontsize=YLAB)
	#ax.set_ylim(0, 20)
	return ax

	def plot_cites(ax, intro, lab):
	sns.lineplot(data=intro, x="date", y="cites", ax=ax, color=COLORS[2], linewidth=4)
	ax.set_title(f"# Citations in {lab}", fontsize=TITLE)
	ax.set_ylabel("# Citations", fontsize=YLAB)
	#ax.set_ylim(0, 20)
	return ax

	def plot_qs(ax, intro, lab):
	sns.lineplot(data=intro, x="date", y="qs", ax=ax, color=COLORS[3], linewidth=4)
	ax.set_title(f"# '?' in {lab}", fontsize=TITLE)
	ax.set_ylabel("# '?'", fontsize=YLAB)
	#ax.set_ylim(0, 20)
	return ax


	def set_style(ax):
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.tick_params(axis="x", labelrotation=45)
	ax.set_xlabel("")
	return ax


	def plot_n_words_total(ax, df_total, paper_words, bla_words):
	sns.lineplot(data=df_total, x="date", y="total_words", ax=ax, color="black", linewidth=3)
	ax.axhspan(0, paper_words, facecolor=COLORS[0], alpha=0.1)
	ax.axhspan(paper_words, paper_words+bla_words, facecolor=COLORS[1], alpha=0.1)
	ax.set_title("Words in my PhD Dissertation", fontsize=20)
	ax.set_xlabel("Date")
	ax.set_ylabel("# Words")
	#xlim = ax.get_xlim()
	ylim = ax.set_ylim(20000, )

	ax.annotate( "Words in the papers", [0.05 , 0.2], color=COLORS[0], xycoords='axes fraction', fontsize=15)
	ax.annotate( "boilerplate", [0.05 , 0.45], color=COLORS[1], xycoords='axes fraction', fontsize=15)
	ax.annotate( "Thesis Text", [0.05 , 0.7], color="black", xycoords='axes fraction', fontsize=15)

	return ax


	def get_file_ending(s):
	r = s.split(".")
	if len(r) < 2:
	r = None
	elif r[0] == "":
	r = None
	else:
	r = r[1]
	return(r)

	def main():
	df = pd.read_csv("/Users/lisa/Documents/SFB1294_B05/Dissertation_tracking/XXX/table.csv", header=None, names=["date", "filepath", "words", "lines", "figs", "cites", "qs", "words_in_text", "words_in_headers", "words_in_captions","n_headers", "n_tab_fig", "n_math_inline", "n_math_disp", "end"])

	df["filepath"] = df["filepath"].str.strip()
	df = df.drop(df[df.filepath == ".gitignore"].index)
	df = df.reset_index(drop=True)
	df["date"] = pd.to_datetime(df["date"])


	split = [os.path.split(f) for f in df["filepath"]]
	split = [[f[0], f[1], get_file_ending(f[1])] for f in split]
	df = pd.concat([df, pd.DataFrame(split, columns=["path", "file", "ending"])], axis=1)
	df.loc[df.path == "", "path"] = "mainfiles"

	df = df.drop(df[df.ending == "pdf"].index)
	df = df.drop(df[df.ending == "txt"].index)
	df = df.drop(df[df.ending == "ldf"].index)
	df = df.drop(df[df.ending == "apc"].index)
	df = df.drop(df[df.ending == "cls"].index)
	df = df.drop(df[df.ending == "bst"].index)
	df = df.drop(df[df.ending == "png"].index)
	df = df.drop(df[df.ending == "sty"].index)
	df = df.drop(df[df.ending == "bib"].index)
	df = df.drop(df[df.ending == "jpg"].index)
	df = df.drop(df[df.ending == "latexmkrc"].index)
	df = df.drop(df[df.file == "latexmkrc"].index)
	df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_1.tex'].index)
	df = df.drop(df[df.filepath == '2_papers/paper2_AntWalk/reviewed_2.tex'].index)
	df = df.reset_index(drop=True)
	labels = [label_dict[i] for i in df.filepath]
	df["labels"] = labels

	df["total_words"] = df["words_in_text"] + df["words_in_headers"] + df["words_in_captions"]

	df_by_lab = df.groupby(["date", "labels"]).total_words.sum().reset_index()
	paper_words = df_by_lab.loc[df_by_lab.labels=="paper"].total_words.unique()[0]
	bla_words = df_by_lab.loc[df_by_lab.labels=="thesis noncontent"].total_words.unique().max() + df_by_lab.loc[df_by_lab.labels=="latex"].total_words.unique().max()
	#latex_words = df_by_lab.loc[df_by_lab.labels=="latex"].words.unique()[0]

	df_total = df.groupby(["date"]).total_words.sum().reset_index()
	intro = df.loc[df["path"] == "1_introduction"]
	disc = df.loc[df["path"] == "3_discussion"]

	fig = plt.figure(figsize=(20,20))
	gs0 = gridspec.GridSpec(2,1, left=0.05, right=1, wspace=0.1, hspace=0.3, height_ratios=[1,2])
	gs1 = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gs0[0])
	gs2 = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs0[1])

	gs21 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[0], hspace=0.4, wspace=0.4)
	gs22 = gridspec.GridSpecFromSubplotSpec(2, 3, subplot_spec=gs2[1], hspace=0.4, wspace=0.4)
	#gs1 = gridspec.GridSpec(3, 6, left=0.05, right=1, wspace=0.2, hspace=0.4)

	# TOTAL
	ax1 = fig.add_subplot(gs1[0])# total
	ax1 = plot_n_words_total(ax1, df_total, paper_words, bla_words)

	# INTRO
	#gs01 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, :3])
	ax_intro = fig.add_subplot(gs21[0, 0:])

	ax3 = fig.add_subplot(gs21[1, 0])
	ax4 = fig.add_subplot(gs21[1, 1])
	ax5 = fig.add_subplot(gs21[1, 2])#

	ax_intro = plot_chapter_intro(ax_intro, intro, "# Words in Introduction")
	ax3 = plot_figs(ax3, intro, "Introduction")
	ax4 = plot_cites(ax4, intro, "Introduction")
	ax5 = plot_qs(ax5, intro, "Introduction")

	# DISCUSSION
	#gs02 = gridspec.GridSpecFromSubplotSpec(3, 3, subplot_spec=gs1[1:, 3:])
	ax_disc = fig.add_subplot(gs22[0, 0:])

	ax_disc = plot_chapter_intro(ax_disc, disc, "# Words in Discussion")

	ax22 = fig.add_subplot(gs22[1, 0])
	ax23 = fig.add_subplot(gs22[1, 1])
	ax24 = fig.add_subplot(gs22[1, 2])

	ax22 = plot_figs(ax22, disc, "Discussion")
	ax23 = plot_cites(ax23, disc, "Discussion")
	ax24 = plot_qs(ax24, disc, "Discussion")

	ax = fig.axes
	ax = [set_style(x) for x in ax]

	fig.savefig("asd2.png", facecolor='white', transparent=False)
	fig.savefig("words_in_diss.png", facecolor='white', transparent=False)
	fig.savefig("words_in_diss.pdf")

	if __name__ == "__main__":
	main()