Skip to content

Instantly share code, notes, and snippets.

@siwells
Created October 13, 2011 16:11
Show Gist options
  • Select an option

  • Save siwells/1284653 to your computer and use it in GitHub Desktop.

Select an option

Save siwells/1284653 to your computer and use it in GitHub Desktop.
Begginnings of a data generation suite for OMERO/HIC datasets & schemas
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import AttributesNSImpl
import hashlib
class SchemaGen:
def __init__(self, output, encoding, projectID, initial_prochi):
"""
"""
anonschema = XMLGenerator(output, encoding)
anonschema.startDocument()
self._anonschema = anonschema
self._output = output
self._encoding = encoding
self._projectID = projectID
self._initial_prochi = initial_prochi
return
def writeFooter(self):
"""
"""
self._anonschema.endElementNS((None, u'project_anonymisation'), u'project_anonymisation')
self._anonschema.endDocument()
return
def writeHeader(self):
"""
"""
attr_vals = {
(None, u'id'): self._projectID,
}
attr_qnames = {
(None, u'id'): self._projectID,
}
attrs = AttributesNSImpl(attr_vals, attr_qnames)
self._anonschema.startElementNS((None, u'project_anonymisation'), u'project_anonymisation', attrs)
return
def writeRow(self):
"""
"""
prochi = self.generate_prochi()
anochihash = self.generate_anochihash(prochi)
attr_vals = {
(None, u'anochihash'): anochihash,
(None, u'prochi'): prochi,
}
attr_qnames = {
(None, u'anochihash'): anochihash,
(None, u'prochi'): prochi,
}
attrs = AttributesNSImpl(attr_vals, attr_qnames)
self._anonschema.startElementNS((None, u'map'), u'map', attrs)
self._anonschema.endElementNS((None, u'map'), u'map')
return
def generate(self, numRows):
"""
"""
self.writeHeader()
for i in range(numRows):
self.writeRow()
self.writeFooter()
return
def generate_prochi(self):
"""
"""
prochi = self._projectID
self._initial_prochi += 1
prochi += str(self._initial_prochi)
return prochi
def generate_anochihash(self, anochi):
"""
"""
h = hashlib.md5()
h.update(anochi)
return h.hexdigest()
class DataSuite:
def __init__(self):
"""
"""
self._range = [10,100,1000,10000]
return
def build(self, schematype):
"""
"""
for size in self._range:
outname = schematype + '_' + str(size) + '.xml'
outfile = open(outname, 'w')
gen = SchemaGen(outfile, 'utf-8', 'AAB', 1000000)
gen.generate(size)
outfile.close()
return
if __name__ == "__main__":
ds = DataSuite()
ds.build('anon')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment