Created
October 4, 2016 22:58
-
-
Save conchoecia/d7f1d5c811965172d03c452ad7ef8321 to your computer and use it in GitHub Desktop.
Parses out a lab notebook in md format into subproject files for easy tracking.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# script: make_notebooks.py | |
# author: darrin t schultz | |
# date : 20161004 | |
# make_notebooks.py is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# make_notebooks.py is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with make_notebooks.py. If not, see <http://www.gnu.org/licenses/>. | |
# This script will compile a project notebook for each project based | |
# on a list of input files. | |
""" | |
This script parses out a lab notebook in md format with date headings and | |
subprojects. This is targeted at github-flavored markdown. | |
- The lab notebook must be in the following format, where the line | |
"## Project List:" is followed by "### projectname" for every project that your | |
notebook contains. | |
- You must have a file called 'notebooklist.txt' that has either the full paths | |
or filenames for every notebook markdown file. For example, for the example | |
notebook we have called 'exampleNotebook.md', the 'notebooklist.txt' file | |
looks like this: | |
``` | |
exampleNotebook.md | |
``` | |
- Here is 'exampleNotebook.md' | |
``` | |
# filename: exampleNotebook.md (doesn't matter what is here) | |
## Project List: | |
### projectA | |
### projectB | |
## (whatever you want here) 20160915 | |
### projectA | |
- Today I worked on ProjectA | |
- I made a lot of progress | |
- It was great | |
### projectB | |
- I didn't do anything today. Oh well. | |
## (whatever you want here) 20160920 | |
- This text should be ignored | |
### projectB | |
- I took 4 days off of work and didn't get anything done. | |
- All of my cultures dried up. | |
``` | |
- After running this script in the directory where you would like the compiled | |
notebooks, you will have a `compiled_notebooks/projectA.md` and | |
`compiled_notebooks/projectB.md` file. | |
************ How this script works ******************* | |
1. Get a list of all the tracked files in the current git repo. | |
2. Get a list of all the files to look at to compile the notebook. | |
3. Get a list of all the project names from all the notebooks. Keyword in | |
notebooks is: "## Project List:" | |
4. Make an object for a project | |
5. Loop through all the dates and look for a "###" with a project name | |
- Take the project name and add that date and entry to the dictionary for | |
that project's object | |
6. When adding an entry to each dict, check for media references and add those | |
to git if they aren't tracked. | |
5. For each project, if a referenced file isn't tracked by the git project, | |
add it. | |
important names: | |
<tracked> is all of the files that are tracked in the git repo currently | |
<notebooks> is all the notebook files to compile from | |
""" | |
import os | |
import sys | |
import subprocess | |
import datetime | |
def get_project_list(filename): | |
projectNames = [] | |
with open(filename, "r") as f: | |
collecting = False | |
for line in f: | |
if line.strip(): | |
if collecting and (line.strip().split(" ")[0] == "##"): | |
return projectNames | |
elif collecting: | |
projectNames.append(line.strip().split(" ")[1]) | |
elif line.strip() == "## Project List:": | |
collecting = True | |
class notebook: | |
def __init__(self, name): | |
""" Entries are dictionarys where: | |
key = entry date | |
val = entry | |
""" | |
self.name = name | |
self.entries = {} | |
self.date = self.date() | |
def date(self): | |
return datetime.date.today().strftime('%Y%m%d') | |
def add_entry(self, date, entry): | |
#print("adding entry to {} for {}\n{}".format(self.name, date, entry)) | |
if self.name == "2016_long_reads": | |
print(date) | |
print(entry) | |
self.entries[date] = entry | |
def get_entries(self): | |
return self.entries | |
def print_entries(self): | |
print(self.entries) | |
def main(): | |
# 1. get a list of all the tracked files in the current git repo. | |
#os.chdir("/Users/darrin/git/lab_notebook_DTS") | |
tracked = subprocess.run("git ls-tree --full-tree -r HEAD | head | cut -f2", | |
shell=True, stdout=subprocess.PIPE).stdout.decode("utf-8").split("\n") | |
# 2. get a list of all the files to looks at to compile the notebook. | |
notebooks = [] | |
with open("notebooklist.txt", "r") as f: | |
for line in f: | |
if line.strip(): | |
notebooks.append(line.strip()) | |
print("Compiling from these notebooks:") | |
for each in notebooks: | |
print(" - {}".format(each)) | |
print() | |
# 3. get all of the projects from all the files | |
projectNames = [] | |
for each in notebooks: | |
projectNames += get_project_list(each) | |
print("Looking for these projects:") | |
for each in projectNames: | |
print(" - {}".format(each)) | |
print() | |
# 4. Make an object for each project | |
projectObjects = {} | |
for name in projectNames: | |
projectObjects[name] = notebook(name) | |
# 5. Loop through all the files, keeping track of the date. | |
# Look for ### entries with a project name | |
# if that project name is in the projectNames list, add entry | |
for each in notebooks: | |
with open(each, "r") as f: | |
start = False | |
date = "" | |
entryString = "" | |
currentProject = "" | |
for line in f: | |
spacedSplit = [x.strip() for x in line.split(" ")] | |
# print(spacedSplit) | |
if spacedSplit[0] and spacedSplit[0] in "###" and currentProject and entryString.strip(): | |
#this correctly enters when it is time to make a new entry. | |
# Here is the breakdown of the logic for each qualifier | |
# <spacedSplit[0]> | |
# <spacedSplit[0] in "###"> | |
# <currentProject> | |
# <entryString.strip()> | |
#print(line) | |
#print(entryString) | |
projectObjects[currentProject].add_entry(date, entryString) | |
entryString = "" | |
if spacedSplit[0] == "##" and spacedSplit[-1].isdigit() and len(spacedSplit[-1]) == 8: | |
date = int(spacedSplit[-1]) | |
start = True | |
elif spacedSplit[0] == "###" and spacedSplit[-1] in projectNames and start: | |
#This is correctly finding the project entries under each date | |
currentProject = spacedSplit[-1] | |
elif start and currentProject: | |
#correctly adding the lines to each project | |
entryString += line | |
if start and currentProject and entryString.strip(): | |
projectObjects[currentProject].add_entry(date, entryString) | |
# This compiles notebooks for things that have entries | |
noteDir = "compiled_notebooks" | |
if not os.path.exists(noteDir): | |
print("Making the directory for compiled notebooks:\n - {}".format(noteDir)) | |
os.makedirs(noteDir) | |
print("\nMaking the compiled notebook:") | |
for name in projectNames: | |
if projectObjects[name].entries != {}: | |
newName = "{}.md".format(name, "w") | |
print(" - {}".format(os.path.basename(newName))) | |
newFile = os.path.join(noteDir, newName) | |
with open(newFile, "w") as f: | |
for date in sorted(projectObjects[name].entries): | |
print("\n## {}".format(date), file=f) | |
print(projectObjects[name].entries[date], file=f) | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment