Skip to content

Instantly share code, notes, and snippets.

@bgilbert
Created February 19, 2012 00:00
Show Gist options
  • Save bgilbert/1861361 to your computer and use it in GitHub Desktop.
Save bgilbert/1861361 to your computer and use it in GitHub Desktop.
Script to dump histogram of low bytes of Mirax position map coordinates
#!/usr/bin/python
#
# OpenSlide, a library for reading whole slide image files
#
# Copyright (c) 2007-2012 Carnegie Mellon University
# Copyright (c) 2011 Google, Inc.
# All rights reserved.
#
# OpenSlide is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, version 2.1.
#
# OpenSlide is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with OpenSlide. If not, see
# <http://www.gnu.org/licenses/>.
#
from __future__ import division
from ConfigParser import RawConfigParser, NoOptionError
import io
import os
import struct
import sys
class SlidedatHierarchy(object):
SECTION = 'HIERARCHICAL'
def __init__(self, dat):
self._by_name = {}
self._next_offset = 0
layers = dat.getint(self.SECTION, self.LAYER_COUNT_KEY)
for layer_id in range(layers):
layer = HierLayer(self, dat, layer_id)
self._by_name[layer.name] = layer
def get_layer_by_name(self, name):
return self._by_name[name]
def next_offset(self):
self._next_offset += 1
return self._next_offset - 1
class NonHierTree(SlidedatHierarchy):
LAYER_COUNT_KEY = 'NONHIER_COUNT'
LAYER_NAME_KEY = 'NONHIER_%d_NAME'
LAYER_SECTION_KEY = 'NONHIER_%d_SECTION'
LEVEL_COUNT_KEY = 'NONHIER_%d_COUNT'
LEVEL_NAME_KEY = 'NONHIER_%d_VAL_%d'
LEVEL_SECTION_KEY = 'NONHIER_%d_VAL_%d_SECTION'
class HierLayer(object):
def __init__(self, h, dat, layer_id):
self.name = dat.get(h.SECTION, h.LAYER_NAME_KEY % layer_id)
self.section = dat.get(h.SECTION, h.LAYER_SECTION_KEY % layer_id)
self.levels = dat.getint(h.SECTION, h.LEVEL_COUNT_KEY % layer_id)
self._by_id = []
self._by_name = {}
for level_id in range(self.levels):
level = HierLevel(h, dat, layer_id, level_id)
self._by_id.append(level)
self._by_name[level.name] = level
def get_level_by_name(self, name):
return self._by_name[name]
def get_level_by_id(self, id):
return self._by_id[id]
class HierLevel(object):
def __init__(self, h, dat, layer_id, level_id):
self.name = dat.get(h.SECTION,
h.LEVEL_NAME_KEY % (layer_id, level_id))
self.section = dat.get(h.SECTION,
h.LEVEL_SECTION_KEY % (layer_id, level_id))
self.offset = h.next_offset()
def read_len(f, size):
ret = f.read(size)
assert(len(ret) == size)
return ret
def read_int32(f):
buf = f.read(4)
assert(len(buf) == 4)
return struct.unpack('<i', buf)[0]
def assert_int32(f, value):
v = read_int32(f)
assert(v == value)
def read_nonhier_record(f, root_position, record):
f.seek(root_position)
# seek to record
table_base = read_int32(f)
f.seek(table_base + record * 4)
# seek to list head
list_head = read_int32(f)
f.seek(list_head)
# seek to data page
assert_int32(f, 0)
page = read_int32(f)
f.seek(page)
# check pagesize
assert_int32(f, 1)
# check rest of prologue
assert_int32(f, 0)
assert_int32(f, 0)
assert_int32(f, 0)
# read actual data
position = read_int32(f)
size = read_int32(f)
fileno = read_int32(f)
return (fileno, position, size)
def read_tile_position_map(image_divisions, tiles_x, f, offset, len):
assert(len % 9 == 0)
f.seek(offset)
images_x = tiles_x // image_divisions
dist_x = {}
dist_y = {}
for i in range(len // 9):
z1 = read_int32(f) & 0xff
z2 = read_int32(f) & 0xff
read_len(f, 1)
dist_x.setdefault(z1, 0)
dist_x[z1] += 1
dist_y.setdefault(z2, 0)
dist_y[z2] += 1
print ' X:', dist_x
print ' Y:', dist_y
def dump_mirax(path):
dirname, ext = os.path.splitext(path)
if ext != '.mrxs':
raise Exception('Not a MIRAX file: %s' % path)
# Start parsing slidedat
f = io.open(os.path.join(dirname, 'Slidedat.ini'), encoding='utf-8-sig')
dat = RawConfigParser()
dat.readfp(f)
tiles_x = dat.getint('GENERAL', 'IMAGENUMBER_X')
tiles_y = dat.getint('GENERAL', 'IMAGENUMBER_Y')
slide_id = dat.get('GENERAL', 'SLIDE_ID')
try:
slide_type = dat.get('GENERAL', 'SLIDE_TYPE')
except NoOptionError:
slide_type = 'unknown'
try:
image_divisions = dat.getint('GENERAL', 'CameraImageDivisionsPerSide')
except NoOptionError:
image_divisions = 1
datafiles = [os.path.join(dirname, dat.get('DATAFILE', 'FILE_%d' % i))
for i in range(dat.getint('DATAFILE', 'FILE_COUNT'))]
# Get position map
try:
position_layer = NonHierTree(dat).get_layer_by_name(
'VIMSLIDE_POSITION_BUFFER')
position_offset = position_layer.get_level_by_name('default').offset
except KeyError:
print ' No position map'
return
# Start parsing index.dat
index = open(os.path.join(dirname, dat.get('HIERARCHICAL', 'INDEXFILE')))
index_version = read_len(index, 5)
index_id = read_len(index, len(slide_id))
nonhier_root = index.tell() + 4
# Print tile position map
fileno, position, size = read_nonhier_record(index, nonhier_root,
position_offset)
read_tile_position_map(image_divisions, tiles_x,
open(datafiles[fileno]), position, size)
if __name__ == '__main__':
for file in sys.argv[1:]:
print file
try:
dump_mirax(file)
except Exception, e:
print e
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment