Last active
July 24, 2022 06:19
-
-
Save edsu/7827831 to your computer and use it in GitHub Desktop.
convert a wordperfect file to messy text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
""" | |
For each character c, if its value is: | |
#13: treat as newline | |
#128, #160: treat as space ' ' | |
#169..#171, #173, #174: treat as dash '-' | |
#192..#236: skip ahead and ignore all characters until another occurrence | |
of character c is found; resume at the following character | |
#0..#31, #129..#159, #161..#168, #172, #175..#255: ignore (control characters) | |
else treat as regular text character | |
from http://fileformats.archiveteam.org/wiki/WordPerfect | |
""" | |
def wpd2txt(filename): | |
text = '' | |
skip = None | |
fh = open(filename, 'rb') | |
for line in fh: | |
for char in line: | |
n = ord(char) | |
if skip and skip == n: | |
skip == None | |
if n == 13: | |
text += "\n" | |
elif n in (128, 160): | |
text += ' ' | |
elif n in (169, 170, 171, 173, 174): | |
text += '-' | |
elif n >= 192 and n <= 236: | |
skip = n | |
elif (n >= 0 and n <= 31) or (n >= 129 and n <= 159) or \ | |
(n >= 161 and n <= 168) or n == 172 or (n >= 175 and n <= 255): | |
pass | |
else: | |
text += char | |
return text | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print "usage wpd2txt.py <filename>" | |
sys.exit(1) | |
filename = sys.argv[1] | |
print wpd2txt(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment