Last active
April 20, 2022 14:42
-
-
Save den-run-ai/2821ff59b02e9482be15d27f2bbd4451 to your computer and use it in GitHub Desktop.
Get difference between two lists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 12 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"maxsize=int(1e5)\n", | |
"l1=list(range(1,maxsize,3))+list(range(1,maxsize,5))\n", | |
"l2=list(range(0,maxsize,7))+list(range(1,maxsize,11))\n", | |
"\n", | |
"l11=np.array(l1)\n", | |
"l12=pd.Index(l11).astype('int32')\n", | |
"l21=np.array(l2)\n", | |
"l22=pd.Index(l21).astype('int32')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 21.5 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"comp1={_ for _ in l2 if _ not in l1}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 26.9 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"comp2=[_ for _ in l2 if _ not in l1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 30.8 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"comp3={_ for _ in l1 if _ not in l2}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 30.7 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"comp4=[_ for _ in l1 if _ not in l2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 8 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"numpy_diff1=np.setdiff1d(l11,l21)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 6 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"numpy_diff2=np.setdiff1d(l21,l11)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 11 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"pd_diff1=l12.difference(l22)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 6.5 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"pd_diff2=l22.difference(l12)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(11776, 12467, 36363, 41557)" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(comp1), len(comp2), len(comp3), len(comp4)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(36363, 11776)" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"numpy_diff1.size, numpy_diff2.size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(36363, 11776)" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd_diff1.size, pd_diff2.size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 1.01 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"maxsize=int(1e7)\n", | |
"l1=list(range(1,maxsize,3))+list(range(1,maxsize,5))\n", | |
"l2=list(range(0,maxsize,7))+list(range(1,maxsize,11))\n", | |
"\n", | |
"l11=np.array(l1)\n", | |
"l12=pd.Index(l11).astype('int32')\n", | |
"l21=np.array(l2)\n", | |
"l22=pd.Index(l21).astype('int32')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 693 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"numpy_diff1=np.setdiff1d(l11,l21)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 707 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"numpy_diff2=np.setdiff1d(l21,l11)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 1.71 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"pd_diff1=l12.difference(l22)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wall time: 1.27 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"pd_diff2=l22.difference(l12)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(3636362, 1177489)" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"numpy_diff1.size, numpy_diff2.size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(3636362, 1177489)" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd_diff1.size, pd_diff2.size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(dtype('int32'), dtype('int64'))" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"numpy_diff1.dtype, pd_diff1.dtype" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"INSTALLED VERSIONS\n", | |
"------------------\n", | |
"commit: None\n", | |
"python: 3.5.3.final.0\n", | |
"python-bits: 64\n", | |
"OS: Windows\n", | |
"OS-release: 10\n", | |
"machine: AMD64\n", | |
"processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel\n", | |
"byteorder: little\n", | |
"LC_ALL: None\n", | |
"LANG: None\n", | |
"LOCALE: None.None\n", | |
"\n", | |
"pandas: 0.19.2\n", | |
"nose: 1.3.7\n", | |
"pip: 8.1.2\n", | |
"setuptools: 34.4.1\n", | |
"Cython: 0.24.1\n", | |
"numpy: 1.12.1\n", | |
"scipy: 0.19.0\n", | |
"statsmodels: 0.8.0\n", | |
"xarray: None\n", | |
"IPython: 5.3.0\n", | |
"sphinx: 1.4.6\n", | |
"patsy: 0.4.1\n", | |
"dateutil: 2.6.0\n", | |
"pytz: 2016.6.1\n", | |
"blosc: None\n", | |
"bottleneck: 1.2.0\n", | |
"tables: 3.2.2\n", | |
"numexpr: 2.6.2\n", | |
"matplotlib: 2.0.0\n", | |
"openpyxl: 2.3.2\n", | |
"xlrd: 1.0.0\n", | |
"xlwt: 1.1.2\n", | |
"xlsxwriter: 0.9.3\n", | |
"lxml: 3.7.2\n", | |
"bs4: 4.5.3\n", | |
"html5lib: 0.999999999\n", | |
"httplib2: None\n", | |
"apiclient: None\n", | |
"sqlalchemy: 1.0.13\n", | |
"pymysql: None\n", | |
"psycopg2: None\n", | |
"jinja2: 2.9.6\n", | |
"boto: 2.42.0\n", | |
"pandas_datareader: None\n" | |
] | |
} | |
], | |
"source": [ | |
"pd.show_versions()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [conda root]", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.3" | |
}, | |
"latex_envs": { | |
"bibliofile": "biblio.bib", | |
"cite_by": "apalike", | |
"current_citInitial": 1, | |
"eqLabelWithNumbers": true, | |
"eqNumInitial": 0 | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Based on http://stackoverflow.com/a/31881491/2230844