jtemporal · January 30, 2017 14:22
diff --git a/gistfile1.txt b/gistfile1.txt
 root@rosie-staging:~/rosie# docker run --rm -v /tmp/serenata-data:/tmp/serenata-data rosie
 2017-01-30 13:34:50 Creating the CSV file
 2017-01-30 13:34:50 Reading the XML file
 2017-01-30 13:34:52 Writing record #3,200 to the CSV
 2017-01-30 13:34:52 Done!
 2017-01-30 13:34:52 Creating the CSV file
 2017-01-30 13:34:52 Reading the XML file
 2017-01-30 13:37:36 Writing record #342,225 to the CSV
 2017-01-30 13:37:36 Done!
 2017-01-30 13:37:36 Creating the CSV file
 2017-01-30 13:37:36 Reading the XML file
 2017-01-30 13:57:05 Writing record #2,404,847 to the CSV/rosie/dataset.py:52: SettingWithCopyWarning: 
 A value is trying to be set on a copy of a slice from a DataFrame.
 Try using .loc[row_indexer,col_indexer] = value instead

 See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  dataset['cnpj'] = dataset['cnpj'].str.replace(r'\D', '')
 2017-01-30 13:57:05 Writing record #2,404,938 to the CSV
 2017-01-30 13:57:05 Done!
 Merging all datasets��
 Loading current-year.xz��
 Loading last-year.xz��
 Loading previous-years.xz��
 Dropping rows without document_value or reimbursement_number��
 Grouping dataset by applicant_id, document_id and year��
 Gathering all reimbursement numbers together��
 Summing all net values together��
 Summing all reimbursement values together��
 Generating the new dataset��
 Casting changes to a new DataFrame��
 Writing it to file��
 Done.
 Traceback (most recent call last):
  File "rosie.py", line 36, in <module>
    command()
  File "rosie.py", line 23, in run
    rosie.main(target_directory)
  File "/rosie/__init__.py", line 64, in main
    dataset = Dataset(target_directory).get()
  File "/rosie/dataset.py", line 22, in get
    right_on='cnpj')
  File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 62, in merge
    return op.get_result()
  File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 564, in get_result
    concat_axis=0, copy=self.copy)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4825, in concatenate_block_managers
    placement=placement) for placement, join_units in concat_plan]
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4825, in <listcomp>
    placement=placement) for placement, join_units in concat_plan]
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4922, in concatenate_join_units
    for ju in join_units]
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4922, in <listcomp>
    for ju in join_units]
  File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 5222, in get_reindexed_values
    fill_value=fill_value)
  File "/usr/local/lib/python3.5/site-packages/pandas/core/algorithms.py", line 1100, in take_nd
    out = np.empty(out_shape, dtype=dtype)
 MemoryError
	root@rosie-staging:~/rosie# docker run --rm -v /tmp/serenata-data:/tmp/serenata-data rosie
	2017-01-30 13:34:50 Creating the CSV file
	2017-01-30 13:34:50 Reading the XML file
	2017-01-30 13:34:52 Writing record #3,200 to the CSV
	2017-01-30 13:34:52 Done!
	2017-01-30 13:34:52 Creating the CSV file
	2017-01-30 13:34:52 Reading the XML file
	2017-01-30 13:37:36 Writing record #342,225 to the CSV
	2017-01-30 13:37:36 Done!
	2017-01-30 13:37:36 Creating the CSV file
	2017-01-30 13:37:36 Reading the XML file
	2017-01-30 13:57:05 Writing record #2,404,847 to the CSV/rosie/dataset.py:52: SettingWithCopyWarning:
	A value is trying to be set on a copy of a slice from a DataFrame.
	Try using .loc[row_indexer,col_indexer] = value instead

	See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
	dataset['cnpj'] = dataset['cnpj'].str.replace(r'\D', '')
	2017-01-30 13:57:05 Writing record #2,404,938 to the CSV
	2017-01-30 13:57:05 Done!
	Merging all datasets��
	Loading current-year.xz��
	Loading last-year.xz��
	Loading previous-years.xz��
	Dropping rows without document_value or reimbursement_number��
	Grouping dataset by applicant_id, document_id and year��
	Gathering all reimbursement numbers together��
	Summing all net values together��
	Summing all reimbursement values together��
	Generating the new dataset��
	Casting changes to a new DataFrame��
	Writing it to file��
	Done.
	Traceback (most recent call last):
	File "rosie.py", line 36, in <module>
	command()
	File "rosie.py", line 23, in run
	rosie.main(target_directory)
	File "/rosie/__init__.py", line 64, in main
	dataset = Dataset(target_directory).get()
	File "/rosie/dataset.py", line 22, in get
	right_on='cnpj')
	File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 62, in merge
	return op.get_result()
	File "/usr/local/lib/python3.5/site-packages/pandas/tools/merge.py", line 564, in get_result
	concat_axis=0, copy=self.copy)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4825, in concatenate_block_managers
	placement=placement) for placement, join_units in concat_plan]
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4825, in <listcomp>
	placement=placement) for placement, join_units in concat_plan]
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4922, in concatenate_join_units
	for ju in join_units]
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 4922, in <listcomp>
	for ju in join_units]
	File "/usr/local/lib/python3.5/site-packages/pandas/core/internals.py", line 5222, in get_reindexed_values
	fill_value=fill_value)
	File "/usr/local/lib/python3.5/site-packages/pandas/core/algorithms.py", line 1100, in take_nd
	out = np.empty(out_shape, dtype=dtype)
	MemoryError