Skip to content

Instantly share code, notes, and snippets.

@atarkowska
Last active July 1, 2016 15:53
Show Gist options
  • Save atarkowska/f064d9a5d7e64b60bdb7e4528a2fca49 to your computer and use it in GitHub Desktop.
Save atarkowska/f064d9a5d7e64b60bdb7e4528a2fca49 to your computer and use it in GitHub Desktop.
Remove duplicates of MapAnnotations
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2016 University of Dundee.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Author: Aleksandra Tarkowska <A(dot)Tarkowska(at)dundee(dot)ac(dot)uk>,
#
# Version: 1.0
import omero, os, traceback
import omero.gateway
import time, datetime
from omero.rtypes import *
from omero.rtypes import unwrap
from omero.cmd import ERR, OK, Delete2
def waitOnCmd(client, handle, loops=1, ms=1000, passes=True):
"""
Wait on an omero.cmd.HandlePrx to finish processing
and then assert pass or fail. The callback is returned
for accessing the Response and Status elements.
"""
callback = omero.callbacks.CmdCallbackI(client, handle)
while True:
found = callback.block(ms)
if found:
break
#callback.loop(loops, ms) # throws on timeout
rsp = callback.getResponse()
is_ok = isinstance(rsp, OK)
assert passes == is_ok, str(rsp)
return callback
host = "10.0.51.133"
port = 4064
rootpass = "ome"
user = "demo"
c = omero.client(host=host, port=port)
s = c.createSession(user, rootpass)
#cfile = csv.writer(open("test_duplicates.csv", "wb"))
query = s.getQueryService()
update = s.getUpdateService()
expId = 2 #s.getAdminService().getEventContext().userId
p = omero.sys.ParametersI()
p.addString("ns", "openmicroscopy.org/omero/bulk_annotations")
p.addId(expId)
sqlcount = """
select count(ial.id)
from ImageAnnotationLink as ial
join ial.child a
where
a.ns = :ns
and a.details.owner.id = :id
"""
rv = query.projection(sqlcount, p)
rv = unwrap(rv)
pagecount = rv[0][0]
print "Map annotations count: ", pagecount
sql = """
select ial
from ImageAnnotationLink as ial
join fetch ial.child a
where
a.ns = :ns
and a.details.owner.id = :id
order by a.id ASC
"""
# store unique maps
maps_cache = dict()
# count duplicates
duplicates = dict()
# map ids
fist_found = dict()
# image ids
images = dict()
empty_map = list()
# add all links that has to be updated
links_to_update = []
# add all maps that become orphaneds
maps_to_delete = []
limit = 10000
total_maps = 0
for page in xrange(1, (pagecount/limit)+2):
print 'PAGE', page, (page-1) * limit, limit
print 'total_maps', total_maps
print 'links_to_update', len(links_to_update), 'maps_to_delete', len(set(maps_to_delete))
p.page((page-1) * limit, limit)
t_res = query.findAllByQuery(sql, p)
total_maps += len(t_res)
for ial in t_res:
# if annotation has mapValue
if hasattr(ial.child, 'mapValue'):
# generate map PK
hkey = list()
for nv in ial.child.mapValue:
_k = "%s: %s" % (nv.name,nv.value)
hkey.append(_k)
rhkey = "|".join(hkey).encode('utf-8')
# check if map PK was already found
try:
maps_cache[rhkey]
except:
# if new PK
maps_cache[rhkey] = ial.child.mapValue
#print "new", type(image), image.id.val, mapann.id.val
duplicates[rhkey] = 0
fist_found[rhkey] = ial.child.id.val
images[rhkey] = [ial.parent.id.val]
else:
# if PK exist
duplicates[rhkey] += 1
images[rhkey].append(ial.parent.id.val)
maps_to_delete.append(ial.child.id.val)
ial.parent.unload()
ial.child.unload()
ial.child.id = rlong(fist_found[rhkey])
links_to_update.append(ial)
else:
empty_map.append(ial.child.id.val)
print 'RESULT', 'total_maps', total_maps
#print {k:v for (k,v) in duplicates.items() if v > 1}
print len(maps_cache), len(duplicates), len(fist_found), len(images), len(empty_map)
#print {k:v for (k,v) in fist_found.items() if len(v) > 1}
print len(links_to_update), len(set(maps_to_delete))
# update links pointing to the duplications
if len(links_to_update) > 0:
for i in range(0, len(links_to_update), 1000):
update.saveArray(links_to_update[i : i+1000])
print 'UPDATED', len(links_to_update)
# delete unlinked maps
if len(maps_to_delete) > 0:
for i in range(0, len(maps_to_delete), 1000):
command = Delete2(targetObjects={"MapAnnotation": maps_to_delete[i : i+1000]})
handle = c.sf.submit(command)
waitOnCmd(c, handle)
print 'DELETED', len(set(maps_to_delete))
s.closeOnDestroy()
c.closeSession()
c.__del__()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment