Last active
June 13, 2021 20:49
-
-
Save el-hult/90f7495907f8edaa1783db23151e4e36 to your computer and use it in GitHub Desktop.
Code for converting a CALS raster file to TIFF. Using vanilla Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Python script that takes a folder, and for each CALS Raster file, it converts to a TIFF file. | |
https://en.wikipedia.org/wiki/CALS_Raster_file_format | |
file ending ".cal" | |
It so happens that TIFF can be compressed with Group 4 compression (as in faxes), and that is the compression format of CALS Raster images Type 1. | |
If the image is rotated, on must fix that. | |
If it is a CALS Type 2 image, it is a tiled image and one must decompress each tiles on its own, and I don't think that TIFF handles that. | |
In such a case, you need to do some more work. But I didn't need to think more about such problems. | |
Ludvig Hult | |
2021-06-13 | |
""" | |
import os | |
import struct | |
import argparse | |
def parse_cals(data): | |
"""Read a CALS file, unpack some part of the header, reaturn the parsed header and the data block | |
The quick data format description is from here | |
http://support.ricoh.com/bb_v1oi/pub_e/oi_view/0001060/0001060558/view/rpgl_rtiff/int/0192.htm | |
Writen by Ludvig Hult 2021-06-13 | |
""" | |
header_block_starts = [ | |
0, | |
0x80, | |
0x100, | |
0x180, | |
0x200, | |
0x280, | |
0x300, | |
0x380, | |
0x400, | |
0x480, | |
0x500, | |
0x580, | |
0x600, | |
0x680, | |
0x700, | |
0x780, | |
] | |
header_block_len = 128 | |
data_block_start=0x800 | |
header_data = [] | |
for start in header_block_starts: | |
header_block = data[start:start+header_block_len] | |
header_data.append(header_block.decode('ANSI')) | |
text_header = "\n".join(header_data) | |
binary_data = data[data_block_start:] | |
noneify = lambda s: None if s == "NONE" else s | |
header = {a: noneify(b.strip()) for a,b in [a.split(":",1) for a in header_data[:10]]} | |
header['notes'] = noneify(data[0x507:0x800].decode('ANSI').strip()) | |
header['rtype'] = int(header['rtype']) | |
header['rdensty'] = int(header['rdensty']) | |
header['rpelcnt'] = tuple(int(a) for a in header['rpelcnt'].split(",")) | |
header['rorient'] = tuple(int(a) for a in header['rorient'].split(",")) | |
return header, binary_data | |
##### | |
# These functions are from https://shreevatsa.github.io/site/ccitt.html | |
####### | |
def tiff_header_for_CCITT(width, height, img_size, CCITT_group=4, blackIsZero=False): | |
"""Returns the appropriate header that will make it a valid TIFF file.""" | |
tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h' | |
return struct.pack(tiff_header_struct, | |
b'II', # Byte order indication: Little-endian | |
42, # Version number (always 42) | |
8, # Offset to first IFD | |
8, # Number of tags in IFD | |
256, 4, 1, width, # ImageWidth, LONG, 1, width | |
257, 4, 1, height, # ImageLength, LONG, 1, length | |
258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1 | |
259, 3, 1, CCITT_group, # Compression, SHORT, 1, 4 = CCITT Group 4 fax encoding | |
262, 3, 1, int(blackIsZero), # Threshholding, SHORT, 1, 0 = WhiteIsZero | |
273, 4, 1, struct.calcsize(tiff_header_struct), # StripOffsets, LONG, 1, len of header | |
278, 4, 1, height, # RowsPerStrip, LONG, 1, length | |
279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image | |
0 # last IFD | |
) | |
def decode_ccitt_data(data, width, height, CCITT_group=4, blackIsZero=False): | |
"""Decodes CCITT-encoded data, if its intended width, height, etc are known.""" | |
img_size = len(data) | |
tiff_header = tiff_header_for_CCITT(width, height, img_size, CCITT_group) | |
return tiff_header + data | |
################################################################################################### | |
def main(): | |
p = argparse.ArgumentParser() | |
p.add_argument("--indir",help="input dir (N.B. does not recurse)",required=True) | |
p.add_argument("--outdir",help="output dir (must exist before hand)",required=True) | |
args = p.parse_args() | |
files = [f for f in os.listdir(args.indir) if f[-4:] == ".cal"] | |
for fname in files: | |
base = fname[:-4] | |
in_path = os.path.join(args.indir,f"{base}.cal") | |
out_path = os.path.join(args.outdir,f"{base}.tiff") | |
with open(in_path,'rb') as cals_file: | |
header,data = parse_cals(cals_file.read()) | |
assert header['rtype'] == 1, "I only deal with type 1" | |
assert header['rorient'] == (0,270), "I only deal with simple orientation" | |
width, height = header['rpelcnt'] | |
with open(out_path, 'wb') as tiff_file: | |
tiff_file.write(decode_ccitt_data(data, width, height, CCITT_group=4, blackIsZero=True)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment