Skip to content

Instantly share code, notes, and snippets.

View eightysteele's full-sized avatar

Aaron Steele eightysteele

View GitHub Profile
[ERROR ] [WorkerThread-0] WorkerThread:
Traceback (most recent call last):
File "/home/eighty/Dropbox/projects/lib/python/google_appengine/google/appengine/tools/adaptive_thread_pool.py", line 176, in WorkOnItems
status, instruction = item.PerformWork(self.__thread_pool)
File "/home/eighty/Dropbox/projects/lib/python/google_appengine/google/appengine/tools/bulkloader.py", line 764, in PerformWork
transfer_time = self._TransferItem(thread_pool)
File "/home/eighty/Dropbox/projects/lib/python/google_appengine/google/appengine/tools/bulkloader.py", line 933, in _TransferItem
self.content = self.request_manager.EncodeContent(self.rows)
File "/home/eighty/Dropbox/projects/lib/python/google_appengine/google/appengine/tools/bulkloader.py", line 1392, in EncodeContent
entity = loader.create_entity(values, key_name=key, parent=parent)
simplejson.dumps(dict(a=1,b=2))
'{"a": 1, "b": 2}'
[dict(cellkey=k, doc=simplejson.dumps(cells[k])) for k in cells.keys()]
cells = dict((k, simplejson.dumps(v)) for k,v in cells.iteritems())
@classmethod
def create(cls, xmin, ymax, xmax, ymin):
return cls(Point(xmin, ymax), Point(xmax, ymin))
class Cell(model.Model):
"""Models a CouchDB cell document.
key_name - The cell key (e.g., 1-2).
"""
rev = model.StringProperty('r')
coords = model.StringProperty('c')
varvals = model.TextProperty('v')
def __eq__(self, other):
$ fdisk -l
WARNING: GPT (GUID Partition Table) detected on '/dev/sda'! The util fdisk doesn't support GPT. Use GNU Parted.
Disk /dev/sda: 250.1 GB, 250059350016 bytes
255 heads, 63 sectors/track, 30401 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
DWC_ALIAS = dict(
acceptedNameUsage='anu',
acceptedNameUsageID='anuid',
accessRights='ar',
associatedMedia='am',
associatedOccurrences='ao',
associatedReferences='ar',
associatedSequences='as',
associatedTaxa='at',
basisOfRecord='br',
# Stop words not to index
STOP_WORDS = [
'a', 'able', 'about', 'across', 'after', 'all', 'almost', 'also', 'am',
'among', 'an', 'and', 'any', 'are', 'as', 'at', 'be', 'because', 'been',
'but', 'by', 'can', 'cannot', 'could', 'dear', 'did', 'do', 'does', 'either',
'else', 'ever', 'every', 'for', 'from', 'get', 'got', 'had', 'has', 'have',
'he', 'her', 'hers', 'him', 'his', 'how', 'however', 'i', 'if', 'in', 'into',
'is', 'it', 'its', 'just', 'least', 'let', 'like', 'likely', 'may', 'me',
'might', 'most', 'must', 'my', 'neither', 'no', 'nor', 'not', 'of', 'off',
'often', 'on', 'only', 'or', 'other', 'our', 'own', 'rather', 'said', 'say',
def get_corpus_list():
def wrapper(value, bulkload_state):
"""Returns list of unique words in the entire record.
Arguments:
value - the JSON encoded record
"""
d = bulkload_state.current_dictionary
recjson = simplejson.loads(value)
d.update(recjson)