Skip to content

Instantly share code, notes, and snippets.

@den-run-ai
Last active April 20, 2022 14:42
Show Gist options
  • Save den-run-ai/2821ff59b02e9482be15d27f2bbd4451 to your computer and use it in GitHub Desktop.
Save den-run-ai/2821ff59b02e9482be15d27f2bbd4451 to your computer and use it in GitHub Desktop.
Get difference between two lists
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 12 ms\n"
]
}
],
"source": [
"%%time\n",
"maxsize=int(1e5)\n",
"l1=list(range(1,maxsize,3))+list(range(1,maxsize,5))\n",
"l2=list(range(0,maxsize,7))+list(range(1,maxsize,11))\n",
"\n",
"l11=np.array(l1)\n",
"l12=pd.Index(l11).astype('int32')\n",
"l21=np.array(l2)\n",
"l22=pd.Index(l21).astype('int32')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 21.5 s\n"
]
}
],
"source": [
"%%time\n",
"comp1={_ for _ in l2 if _ not in l1}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 26.9 s\n"
]
}
],
"source": [
"%%time\n",
"comp2=[_ for _ in l2 if _ not in l1]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 30.8 s\n"
]
}
],
"source": [
"%%time\n",
"comp3={_ for _ in l1 if _ not in l2}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 30.7 s\n"
]
}
],
"source": [
"%%time\n",
"comp4=[_ for _ in l1 if _ not in l2]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 8 ms\n"
]
}
],
"source": [
"%%time\n",
"numpy_diff1=np.setdiff1d(l11,l21)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 6 ms\n"
]
}
],
"source": [
"%%time\n",
"numpy_diff2=np.setdiff1d(l21,l11)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 11 ms\n"
]
}
],
"source": [
"%%time\n",
"pd_diff1=l12.difference(l22)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 6.5 ms\n"
]
}
],
"source": [
"%%time\n",
"pd_diff2=l22.difference(l12)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(11776, 12467, 36363, 41557)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(comp1), len(comp2), len(comp3), len(comp4)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(36363, 11776)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numpy_diff1.size, numpy_diff2.size"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(36363, 11776)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd_diff1.size, pd_diff2.size"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 1.01 s\n"
]
}
],
"source": [
"%%time\n",
"maxsize=int(1e7)\n",
"l1=list(range(1,maxsize,3))+list(range(1,maxsize,5))\n",
"l2=list(range(0,maxsize,7))+list(range(1,maxsize,11))\n",
"\n",
"l11=np.array(l1)\n",
"l12=pd.Index(l11).astype('int32')\n",
"l21=np.array(l2)\n",
"l22=pd.Index(l21).astype('int32')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 693 ms\n"
]
}
],
"source": [
"%%time\n",
"numpy_diff1=np.setdiff1d(l11,l21)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 707 ms\n"
]
}
],
"source": [
"%%time\n",
"numpy_diff2=np.setdiff1d(l21,l11)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 1.71 s\n"
]
}
],
"source": [
"%%time\n",
"pd_diff1=l12.difference(l22)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 1.27 s\n"
]
}
],
"source": [
"%%time\n",
"pd_diff2=l22.difference(l12)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"(3636362, 1177489)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numpy_diff1.size, numpy_diff2.size"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(3636362, 1177489)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd_diff1.size, pd_diff2.size"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(dtype('int32'), dtype('int64'))"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numpy_diff1.dtype, pd_diff1.dtype"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"INSTALLED VERSIONS\n",
"------------------\n",
"commit: None\n",
"python: 3.5.3.final.0\n",
"python-bits: 64\n",
"OS: Windows\n",
"OS-release: 10\n",
"machine: AMD64\n",
"processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel\n",
"byteorder: little\n",
"LC_ALL: None\n",
"LANG: None\n",
"LOCALE: None.None\n",
"\n",
"pandas: 0.19.2\n",
"nose: 1.3.7\n",
"pip: 8.1.2\n",
"setuptools: 34.4.1\n",
"Cython: 0.24.1\n",
"numpy: 1.12.1\n",
"scipy: 0.19.0\n",
"statsmodels: 0.8.0\n",
"xarray: None\n",
"IPython: 5.3.0\n",
"sphinx: 1.4.6\n",
"patsy: 0.4.1\n",
"dateutil: 2.6.0\n",
"pytz: 2016.6.1\n",
"blosc: None\n",
"bottleneck: 1.2.0\n",
"tables: 3.2.2\n",
"numexpr: 2.6.2\n",
"matplotlib: 2.0.0\n",
"openpyxl: 2.3.2\n",
"xlrd: 1.0.0\n",
"xlwt: 1.1.2\n",
"xlsxwriter: 0.9.3\n",
"lxml: 3.7.2\n",
"bs4: 4.5.3\n",
"html5lib: 0.999999999\n",
"httplib2: None\n",
"apiclient: None\n",
"sqlalchemy: 1.0.13\n",
"pymysql: None\n",
"psycopg2: None\n",
"jinja2: 2.9.6\n",
"boto: 2.42.0\n",
"pandas_datareader: None\n"
]
}
],
"source": [
"pd.show_versions()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
},
"latex_envs": {
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 0
}
},
"nbformat": 4,
"nbformat_minor": 1
}
@den-run-ai
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment