Skip to content

Instantly share code, notes, and snippets.

@lastlegion
Last active May 1, 2017 18:35
Show Gist options
  • Save lastlegion/a6eacd2f16ce579a474f0890ac7af3b1 to your computer and use it in GitHub Desktop.
Save lastlegion/a6eacd2f16ce579a474f0890ac7af3b1 to your computer and use it in GitHub Desktop.
metadataextractor.py
import hashlib
import time
import openslide
class MetadataExtractor:
PROPERTIES = [
"objective",
"mpp-x",
"mpp-y",
"vendor",
"width",
"height",
"level_count",
"timestamp",
"md5sum",
"file-location", #From CSV
"case_id",
"study_id"
]
fileMetadata = {}
def __init__(self, fileMetadata):
self.fileMetadata = fileMetadata
self.imageMetadata = self.extractImageMetadata()
if(self.imageMetadata == []):
print("Couldn't fetch image metadaa")
def extractImageMetadata(self):
fileMetadata = self.fileMetadata
fileLocation = fileMetadata['file-location']
try:
openslideF = openslide.OpenSlide(fileLocation)
payLoad = openslideF
return payLoad
except:
print("Couldn't read "+fileLocation)
return []
def generateMD5Checksum(self,fileName):
m = hashlib.md5()
blocksize = 2**20
with open(fileName, "rb") as f:
while True:
buf = f.read(blocksize)
if not buf:
break
m.update(buf)
return m.hexdigest()
def createPayLoad(self):
payLoad = {}
fileMetadata = self.fileMetadata
imageMetadata = self.imageMetadata
#print(imageMetadata.properties)
if(imageMetadata):
for prop in self.PROPERTIES:
#print(prop)
if prop == "file-location" or prop == "filename":
payLoad[prop] = fileMetadata['file-location']
elif prop == 'study_id':
payLoad[prop] = fileMetadata['study_id']
elif prop == "case_id":
payLoad["case_id"] = fileMetadata['id']
elif prop in ["mpp-x", "mpp-y", "objective"]:
'''
Tiff specific
'''
if prop == "mpp-x":
if("tiff.XResolution" in imageMetadata.properties):
payLoad[prop] = float(imageMetadata.properties["tiff.XResolution"])
if prop == "mpp-y":
if("tiff.YResolution" in imageMetadata.properties):
payLoad[prop] = float(imageMetadata.properties["tiff.YResolution"])
'''
(SVS) Generic
'''
property_ = 'openslide.'+str(prop)
if(property_ in imageMetadata.properties):
payLoad[prop] = float(imageMetadata.properties['openslide.'+str(prop)])
else:
if(prop == "objective"):
if("aperio.AppMag" in imageMetadata.properties):
payLoad[prop] = float(imageMetadata.properties["aperio.AppMag"])
else:
print(imageMetadata.properties)
elif prop in ["height", "width"]:
hw = "openslide.level[0]."+str(prop)
payLoad[prop] = int(imageMetadata.properties[hw])
elif prop == "vendor":
payLoad[prop] = imageMetadata.properties['openslide.'+str(prop)]
elif prop == "md5sum":
payLoad[prop] = self.generateMD5Checksum(fileMetadata['file-location'])
elif prop == "level_count":
payLoad[prop] = int(imageMetadata.level_count)
elif prop == "timestamp":
payLoad[prop] = time.time()
else:
print("Couldn't handle: "+ prop)
'''
#Check nothing is missing
for prop in self.PROPERTIES:
#print(payLoad)
if prop not in payLoad:
payLoad
if not payLoad[prop]:
payLoad = {}
'''
if("mpp-x" in payLoad):
payLoad["mpp_x"] = payLoad["mpp-x"]
payLoad["mpp_y"] = payLoad["mpp-y"]
else:
payLoad["mpp_x"] = 0.25
payLoad["mpp_y"] = 0.25
payLoad["subject_id"] = payLoad["case_id"]
payLoad["filename"] = payLoad["file-location"]
return payLoad
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment