Created
July 10, 2017 20:06
-
-
Save wphicks/991a085174b45720a4255987c290546d to your computer and use it in GitHub Desktop.
A basic command line tool for determining word count of Open Document Text (.odt and .fodt) files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import argparse | |
import zipfile | |
import shutil | |
import tempfile | |
import xml.etree.ElementTree as ET | |
def count_fodt(filename): | |
root = ET.parse(filename).getroot() | |
return sum(len(text.split()) for text in root.itertext()) | |
def count_odt(filename): | |
with tempfile.NamedTemporaryFile() as tmp_file: | |
with zipfile.ZipFile(filename) as odt_file: | |
with odt_file.open("content.xml") as content_file: | |
shutil.copyfileobj(content_file, tmp_file) | |
tmp_file.seek(0) | |
word_count = count_fodt(tmp_file.name) | |
return word_count | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description='Count words in Open Document text file' | |
) | |
parser.add_argument("filename", help="Name of odt or fodt file") | |
if len(sys.argv) == 1: | |
parser.print_help() | |
sys.exit(1) | |
args = parser.parse_args() | |
if os.path.splitext(args.filename)[1].lower() == ".fodt": | |
word_count = count_fodt(args.filename) | |
else: | |
word_count = count_odt(args.filename) | |
print(word_count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment