Last active
August 13, 2017 04:16
-
-
Save dboyliao/16003486079580bca13b5322777b8284 to your computer and use it in GitHub Desktop.
Simple demo code for fast array replacement (with performance comparison)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from __future__ import print_function | |
| import argparse | |
| import numpy as np | |
| import time | |
| # a random sequence | |
| arr = np.random.choice(['a', 'b', 'c', 'd'], 5000, replace=True) | |
| positive_values = ['a', 'b'] | |
| def main(repeat_times=10): | |
| result_naive = np.array(naive_impl(arr, positive_values)) | |
| result_np = np_impl(arr, positive_values) | |
| print("produce same results: {}".format(np.alltrue(result_np == result_naive))) | |
| exec_durations = [] | |
| for _ in range(repeat_times): | |
| tic = time.time() | |
| _ = naive_impl(arr, positive_values) | |
| toc = time.time() | |
| exec_durations.append(toc-tic) | |
| print("mean execution time for naive_impl: {} sec".format(np.mean(exec_durations))) | |
| exec_durations = [] | |
| for _ in range(repeat_times): | |
| tic = time.time() | |
| _ = np_impl(arr, positive_values) | |
| toc = time.time() | |
| exec_durations.append(toc-tic) | |
| print("mean execution time for np_impl: {} sec".format(np.mean(exec_durations))) | |
| return 0 | |
| def naive_impl(arr, pos_values): | |
| ret_arr = [] | |
| for value in arr: | |
| if value in pos_values: | |
| ret_arr.append('positive') | |
| else: | |
| ret_arr.append('negative') | |
| return ret_arr | |
| def np_impl(arr, pos_values): | |
| return np.where(np.isin(arr, pos_values), ['positive'], ['negative']) | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("-n", "--repeat-times", dest="repeat_times", | |
| help="repeat times for profiling", | |
| type=int, default=10) | |
| args = vars(parser.parse_args()) | |
| main(**args) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If we make
arrin the line 8 a list, there will be no significant performance boost in both implementation.Here we can see that
numpyarray is a good substitution tolistif you are handling homogeneous array.