Last active
May 19, 2017 09:56
-
-
Save me-shaon/c81c115cdae82d8f4af3868486906025 to your computer and use it in GitHub Desktop.
Github markdown fixer script in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import glob | |
import re | |
import codecs | |
def fixItRalph(dirPath): | |
#List of Regular Expressions need to apply | |
changeRules = [ | |
{ 'find': r'^(#+)([^ #].+)$', 'replaceWith': r'\1 \2' }, #Header Elements fixers | |
{ 'find': r'(\[.+?\])\s+(\(.*?\))', 'replaceWith': r'\1\2'} #URL Link elements fixers | |
]; | |
files = glob.glob(dirPath + "/**/*.md", recursive=True) | |
tmp_file = 'tmp.md' | |
for file in files: | |
with codecs.open(file, 'r', encoding='utf8') as fi, \ | |
codecs.open(tmp_file, 'w', encoding='utf8') as fo: | |
for line in fi: | |
for rule in changeRules: | |
line = re.sub(rule['find'], rule['replaceWith'], line, flags=re.MULTILINE) | |
fo.write(line) | |
os.remove(file) # remove original | |
os.rename(tmp_file, file) # rename temp to original name | |
if __name__ == "__main__": | |
dirPath = sys.argv[1] if (len(sys.argv) >=2) else os.getcwd() | |
fixItRalph(dirPath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I wrote the first version of this code using fileinput module of python to edit the file in 'inplace' mode. But there was a problem with the script that it didn't work properly with the markdown file contains 'Unicode' characters.
Then I re-wrote it using the codecs module of python with the proper encoding.