jtemporal · January 27, 2017 19:25
diff --git a/rosie-memory-error-DO b/rosie-memory-error-DO
 root@rosie-staging:~/rosie# docker run --rm -v /tmp/serenata-data:/tmp/serenata-data rosie
 2017-01-27 18:35:26 Creating the CSV file
 2017-01-27 18:35:26 Reading the XML file
 2017-01-27 18:35:27 Writing record #2,796 to the CSV
 2017-01-27 18:35:27 Done!
 2017-01-27 18:35:27 Creating the CSV file
 2017-01-27 18:35:27 Reading the XML file
 2017-01-27 18:37:41 Writing record #342,077 to the CSV
 2017-01-27 18:37:41 Done!
 2017-01-27 18:37:41 Creating the CSV file
 2017-01-27 18:37:41 Reading the XML file
 2017-01-27 18:53:36 Writing record #2,404,938 to the CSV
 2017-01-27 18:53:36 Done!
 Merging all datasets��
 Loading current-year.xz��
 Loading last-year.xz��
 Loading previous-years.xz��
 Traceback (most recent call last):
  File "rosie.py", line 36, in <module>
    command()
  File "rosie.py", line 23, in run
    rosie.main(target_directory)
  File "/rosie/__init__.py", line 64, in main
    dataset = Dataset(target_directory).get()
  File "/rosie/dataset.py", line 16, in get
    self.update_datasets()
  File "/rosie/dataset.py", line 30, in update_datasets
    ceap.clean()
  File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/ceap_dataset.py", line 47, in clean
    dataset = reimbursements.group(reimbursements.receipts)
  File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 46, in receipts
    return pd.concat(data)
  File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 1451, in concat
    copy=copy)
  File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 1481, in __init__
    objs = list(objs)
  File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 45, in <genexpr>
    data = (self.read_csv(name) for name in datasets)
  File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 39, in read_csv
    return pd.read_csv(filepath, dtype=dtype)
  File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 646, in parser_f
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 401, in _read
    data = parser.read()
  File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 957, in read
    df = DataFrame(col_dict, columns=columns, index=index)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 266, in __init__
    mgr = self._init_dict(data, index, columns, dtype=dtype)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 402, in _init_dict
    return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 5408, in _arrays_to_mgr
    return create_block_manager_from_arrays(arrays, arr_names, axes)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4262, in create_block_manager_from_arrays
    blocks = form_blocks(arrays, names, axes)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4359, in form_blocks
    object_blocks = _simple_blockify(object_items, np.object_)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4389, in _simple_blockify
    values, placement = _stack_arrays(tuples, dtype)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4451, in _stack_arrays
    stacked = np.empty(shape, dtype=dtype)
 MemoryError
	root@rosie-staging:~/rosie# docker run --rm -v /tmp/serenata-data:/tmp/serenata-data rosie
	2017-01-27 18:35:26 Creating the CSV file
	2017-01-27 18:35:26 Reading the XML file
	2017-01-27 18:35:27 Writing record #2,796 to the CSV
	2017-01-27 18:35:27 Done!
	2017-01-27 18:35:27 Creating the CSV file
	2017-01-27 18:35:27 Reading the XML file
	2017-01-27 18:37:41 Writing record #342,077 to the CSV
	2017-01-27 18:37:41 Done!
	2017-01-27 18:37:41 Creating the CSV file
	2017-01-27 18:37:41 Reading the XML file
	2017-01-27 18:53:36 Writing record #2,404,938 to the CSV
	2017-01-27 18:53:36 Done!
	Merging all datasets��
	Loading current-year.xz��
	Loading last-year.xz��
	Loading previous-years.xz��
	Traceback (most recent call last):
	File "rosie.py", line 36, in <module>
	command()
	File "rosie.py", line 23, in run
	rosie.main(target_directory)
	File "/rosie/__init__.py", line 64, in main
	dataset = Dataset(target_directory).get()
	File "/rosie/dataset.py", line 16, in get
	self.update_datasets()
	File "/rosie/dataset.py", line 30, in update_datasets
	ceap.clean()
	File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/ceap_dataset.py", line 47, in clean
	dataset = reimbursements.group(reimbursements.receipts)
	File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 46, in receipts
	return pd.concat(data)
	File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 1451, in concat
	copy=copy)
	File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 1481, in __init__
	objs = list(objs)
	File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 45, in <genexpr>
	data = (self.read_csv(name) for name in datasets)
	File "/usr/local/lib/python3.5/site-packages/serenata_toolbox/reimbursements.py", line 39, in read_csv
	return pd.read_csv(filepath, dtype=dtype)
	File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 646, in parser_f
	return _read(filepath_or_buffer, kwds)
	File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 401, in _read
	data = parser.read()
	File "/usr/local/lib/python3.5/site-packages/pandas/io/parsers.py", line 957, in read
	df = DataFrame(col_dict, columns=columns, index=index)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 266, in __init__
	mgr = self._init_dict(data, index, columns, dtype=dtype)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 402, in _init_dict
	return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/frame.py", line 5408, in _arrays_to_mgr
	return create_block_manager_from_arrays(arrays, arr_names, axes)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4262, in create_block_manager_from_arrays
	blocks = form_blocks(arrays, names, axes)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4359, in form_blocks
	object_blocks = _simple_blockify(object_items, np.object_)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4389, in _simple_blockify
	values, placement = _stack_arrays(tuples, dtype)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4451, in _stack_arrays
	stacked = np.empty(shape, dtype=dtype)
	MemoryError
No results found