Skip to content

Instantly share code, notes, and snippets.

@StoneLabs
Last active April 29, 2026 10:41
Show Gist options
  • Select an option

  • Save StoneLabs/3a413a2d851f73aedbbbe177137853df to your computer and use it in GitHub Desktop.

Select an option

Save StoneLabs/3a413a2d851f73aedbbbe177137853df to your computer and use it in GitHub Desktop.
ODT to custom md
import zipfile, subprocess, re, sys
from pathlib import Path
if len(sys.argv) < 2:
print("Usage: python convert.py <input.odt>")
sys.exit(1)
input_path = sys.argv[1]
if not Path(input_path).exists():
print(f"Error: file not found: {input_path}")
sys.exit(1)
if Path(input_path).suffix.lower() != '.odt':
print(f"Error: expected .odt file, got: {Path(input_path).suffix}")
sys.exit(1)
patched = '/tmp/input_patched.odt'
print(f"Reading {input_path}...")
with zipfile.ZipFile(input_path) as z:
content = z.read('content.xml').decode()
empty_paras = len(re.findall(r'<text:p[^>]*/>', content))
print(f" -> Found {empty_paras} empty paragraph(s), patching...")
content = re.sub(
r'<text:p[^>]*/>',
'<text:p text:style-name="Text_20_Body">EMPTY_PARA_PLACEHOLDER</text:p>',
content
)
print(f"Writing patched ODT to {patched}...")
with zipfile.ZipFile(patched, 'w') as zout:
with zipfile.ZipFile(input_path) as zin:
for item in zin.infolist():
if item.filename == 'content.xml':
zout.writestr(item, content)
else:
zout.writestr(item, zin.read(item.filename))
print(f"Running pandoc on {patched}...")
print(f" -> pandoc {patched} -t markdown-smart --wrap=none")
result = subprocess.run(
['pandoc', patched, '-t', 'markdown-smart', '--wrap=none'],
capture_output=True, text=True
)
if result.returncode != 0:
print(f"Error: pandoc failed:\n{result.stderr}")
sys.exit(1)
print(" -> removing patched markers")
output = result.stdout.replace('EMPTY_PARA_PLACEHOLDER\n', '')
if 'EOFEOFEOF' in output:
output = output[:output.index('EOFEOFEOF')]
print(" -> Truncated output at eof marker")
print("Outputting")
output_dir = Path('md-export')
print(f" -> Creating output folder {output_dir.resolve()}")
output_dir.mkdir(exist_ok=True)
output_path = output_dir / Path(input_path).with_suffix('.md').name
print(f" -> Writing output to {output_path}...")
with open(output_path, 'w') as f:
f.write(output)
print(f"Done. {len(output.splitlines())} lines written to {output_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment