Created
June 10, 2020 14:43
-
-
Save geekman/f75f57f848c3ac2ccc98a433db40eb2a to your computer and use it in GitHub Desktop.
script to fix up blog post markdown files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # | |
| # rewrite blog posts front matter to fix date format and title | |
| # 2020.05.26 darell tan | |
| # | |
| import re | |
| import os | |
| import sys | |
| import time | |
| OLD_FMT = '%d.%m.%Y, %H:%M' | |
| NEW_FMT = '%Y-%m-%d %H:%M:%S' | |
| NEW_FMT2 = '%Y-%m-%d' | |
| EOL_RE = re.compile(r'\s+$') | |
| def date_can_parse(fmt, s): | |
| try: | |
| time.strptime(s, fmt) | |
| return True | |
| except: | |
| return False | |
| def is_quoted(s): | |
| s = s.strip() | |
| return s[0] == s[-1] and s[0] in '"\'' | |
| def quotestr(s): | |
| if is_quoted(s): | |
| return s | |
| HAS_DQ = '"' in s | |
| HAS_SQ = "'" in s | |
| if HAS_DQ and not HAS_SQ: # has double | |
| return "'%s'" % s | |
| elif HAS_SQ and not HAS_DQ: # has single | |
| return '"%s"' % s | |
| elif HAS_DQ and HAS_SQ: # has both | |
| return "'%s'" % s.replace("'", "''") | |
| # if it has funny chars, use double | |
| if ':' in s: | |
| return '"%s"' % s | |
| def rewrite_file(fqn): | |
| changed = False | |
| contents = [] | |
| with open(fqn, 'rb') as f: | |
| print fqn | |
| start = next(f) | |
| if '---' not in start: | |
| return | |
| contents.append(start) | |
| for l in f: | |
| contents.append(l) | |
| # end of front matter | |
| if l.strip().startswith('---'): | |
| break | |
| m = EOL_RE.search(l) | |
| tail = m.group(0) if m else '' | |
| # parse front matter k-v pairs | |
| k, v = l.strip().split(':', 1) | |
| k = k.strip() | |
| v = v.strip() | |
| if k == 'date': | |
| try: | |
| t = time.strptime(v, OLD_FMT) | |
| except: | |
| # new date format already, no need changes | |
| if date_can_parse(NEW_FMT, v) or date_can_parse(NEW_FMT2, v): | |
| continue | |
| # unable to parse | |
| raise | |
| t2 = time.strftime(NEW_FMT, t) | |
| contents[-1] = '%s: %s%s' % (k, t2, tail) | |
| changed = True | |
| elif k == 'title': | |
| if ':' in v and not is_quoted(v): | |
| contents[-1] = '%s: %s%s' % (k, quotestr(v), tail) | |
| changed = True | |
| # put remaining file contents | |
| contents.extend(f) | |
| # write the file back | |
| if changed: | |
| with open(fqn, 'wb') as f: | |
| f.write(''.join(contents)) | |
| assert os.path.isdir(sys.argv[1]) | |
| for root, _, files in os.walk(sys.argv[1]): | |
| for fn in files: | |
| fqn = os.path.join(root, fn) | |
| if fqn.endswith('.md'): | |
| try: | |
| rewrite_file(fqn) | |
| except: | |
| print('error processing file ' + fqn) | |
| raise | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment