rdhyee · February 24, 2016 21:26
diff --git a/process_chunk_timing.txt b/process_chunk_timing.txt
 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
   330                                               @profile
   331                                               def process_chunk(self, all_bytes):
   332                                                   '''Convert the structured ndarray `all_bytes` to the target_dtype
   333                                           
   334                                                   If you did not specify do_process_chunk, you might run this yourself on
   335                                                   chunks that you get from iteration.'''
   336                                                   # Note, this is slower than the code directly below
   337                                                   # records = recfunctions.append_fields(easy_converted, 'Time',
   338                                                   #                                      time64ish, usemask=False)
   339         1           15     15.0      0.0          target_dtype = np.dtype(self.bytes_spec.target_dtype)
   340         1           16     16.0      0.0          combined = np.empty(all_bytes.shape, dtype=target_dtype)
   341                                           
   342                                                   # This should perform type coercion as well
   343        21          125      6.0      0.0          for name in target_dtype.names:
   344        20           22      1.1      0.0              if name == 'Time':
   345         1            1      1.0      0.0                  continue
   346        19      3587575 188819.7     98.1              combined[name] = all_bytes[name]
   347                                           
   348                                                   # These don't have the decimal point in the TAQ file
   349         3           54     18.0      0.0          for dollar_col in ['Bid_Price', 'Ask_Price']:
   350         2        28956  14478.0      0.8              combined[dollar_col] /= 10000
   351                                           
   352                                                   # Currently, there doesn't seem to be any value in converting to
   353                                                   # numpy.datetime64, as PyTables wants float64's corresponding to the POSIX
   354                                                   # Standard (relative to 1970-01-01, UTC) that it then converts to a
   355                                                   # time64 struct on it's own
   356                                           
   357                                                   # TODO This is the right math, but we still need to ensure we're
   358                                                   # coercing to sufficient data types (we need to make some tests!).
   359                                           
   360                                                   # The math is also probably a bit inefficient, but it seems to work,
   361                                                   # and based on Dav's testing, this is taking negligible time compared
   362                                                   # to the above conversions.
   363                                                   time64ish = (self.midnight_ts +
   364                                                                combined['hour'] * 3600 +
   365         1        21903  21903.0      0.6                       combined['minute'] * 60 +
   366                                                                # I'm particularly amazed that this seems to work (in py3)
   367         1        12433  12433.0      0.3                       combined['msec'] / 1000)
   368                                           
   369         1         7805   7805.0      0.2          combined['Time'] = time64ish
   370                                           
   371         1            2      2.0      0.0          return combined
	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	330 @profile
	331 def process_chunk(self, all_bytes):
	332 '''Convert the structured ndarray `all_bytes` to the target_dtype
	333
	334 If you did not specify do_process_chunk, you might run this yourself on
	335 chunks that you get from iteration.'''
	336 # Note, this is slower than the code directly below
	337 # records = recfunctions.append_fields(easy_converted, 'Time',
	338 # time64ish, usemask=False)
	339 1 15 15.0 0.0 target_dtype = np.dtype(self.bytes_spec.target_dtype)
	340 1 16 16.0 0.0 combined = np.empty(all_bytes.shape, dtype=target_dtype)
	341
	342 # This should perform type coercion as well
	343 21 125 6.0 0.0 for name in target_dtype.names:
	344 20 22 1.1 0.0 if name == 'Time':
	345 1 1 1.0 0.0 continue
	346 19 3587575 188819.7 98.1 combined[name] = all_bytes[name]
	347
	348 # These don't have the decimal point in the TAQ file
	349 3 54 18.0 0.0 for dollar_col in ['Bid_Price', 'Ask_Price']:
	350 2 28956 14478.0 0.8 combined[dollar_col] /= 10000
	351
	352 # Currently, there doesn't seem to be any value in converting to
	353 # numpy.datetime64, as PyTables wants float64's corresponding to the POSIX
	354 # Standard (relative to 1970-01-01, UTC) that it then converts to a
	355 # time64 struct on it's own
	356
	357 # TODO This is the right math, but we still need to ensure we're
	358 # coercing to sufficient data types (we need to make some tests!).
	359
	360 # The math is also probably a bit inefficient, but it seems to work,
	361 # and based on Dav's testing, this is taking negligible time compared
	362 # to the above conversions.
	363 time64ish = (self.midnight_ts +
	364 combined['hour'] * 3600 +
	365 1 21903 21903.0 0.6 combined['minute'] * 60 +
	366 # I'm particularly amazed that this seems to work (in py3)
	367 1 12433 12433.0 0.3 combined['msec'] / 1000)
	368
	369 1 7805 7805.0 0.2 combined['Time'] = time64ish
	370
	371 1 2 2.0 0.0 return combined