adam704a · February 23, 2018 06:56
diff --git a/gistfile1.ipynb b/gistfile1.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1741506"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(1 for line in open('globe.csv'))\n",
    "# 870,753 = 1741506/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1745425"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(1 for line in open('smart.csv'))\n",
    "# 872,712.5 = 1745425/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "smart1 bad is 667413\n",
      "smart1 good is 203340\n",
      "globe1 bad is 549565\n",
      "globe1 good is 321188\n"
     ]
    }
   ],
   "source": [
    "smart1_bad = sum(1 for line in open('smart1_bad.csv'))\n",
    "smart1_good = sum(1 for line in open('smart1_good.csv'))\n",
    "globe1_bad = sum(1 for line in open('globe1_bad.csv'))\n",
    "globe1_good = sum(1 for line in open('globe1_good.csv'))\n",
    "print(\"smart1 bad is \"+ str(smart1_bad))\n",
    "print(\"smart1 good is \"+ str(smart1_good))\n",
    "print(\"globe1 bad is \"+ str(globe1_bad))\n",
    "print(\"globe1 good is \"+ str(globe1_good))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "smart is 1745425\n",
      "smart 1 is 872712\n",
      "smart 2 is 872713\n"
     ]
    }
   ],
   "source": [
    "smart = sum(1 for line in open('smart.csv'))\n",
    "smart1 = sum(1 for line in open('smart1.csv'))\n",
    "smart2 = sum(1 for line in open('smart2.csv'))\n",
    "\n",
    "print(\"smart is \"+ str(smart))\n",
    "print(\"smart 1 is \"+ str(smart1))\n",
    "print(\"smart 2 is \"+ str(smart2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1745425"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_lines\n",
    "# 1745425 = 872,712 + 872,713"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Good prefixes from https://docs.google.com/spreadsheets/d/1tlvcmpeFMN5ZHgim8TZkLTwTwhIvjbFY0ZUl7Lpabow/edit#gid=571114283\n",
    "\n",
    "good_globe=[\n",
    "63905,\n",
    "63915,\n",
    "63916,\n",
    "63926,\n",
    "63927,\n",
    "63997,\n",
    "63995]\n",
    "\n",
    "good_smart=[\n",
    "63908,\n",
    "63919,\n",
    "63920,\n",
    "63930,\n",
    "63939,\n",
    "63998,\n",
    "63999]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "true\n"
     ]
    }
   ],
   "source": [
    "\n",
    "mine = \"639080000704\"\n",
    "prefix = mine[:5]\n",
    "\n",
    "if int(prefix) in good_smart:\n",
    "    print(\"true\")\n",
    "else:\n",
    "    print(\"untrue\")    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Split Globe and filter on the good list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv, itertools\n",
    "\n",
    "# as globe1 and globe2\n",
    "with open('globe1_bad.csv', 'w') as bad_file:\n",
    "    with open('globe1_good.csv', 'w') as good_file:\n",
    "        with open('globe.csv') as origfile:\n",
    "            for row in itertools.islice(csv.reader(origfile), 870753):\n",
    "                #print row[0].strip('+')\n",
    "                #print(row[0])\n",
    "                clean = row[0].strip('+')\n",
    "                prefix = clean[:5]\n",
    "                if int(prefix) in good_globe:\n",
    "                    good_file.write(clean+'\\n')\n",
    "                else:\n",
    "                    bad_file.write(clean+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# as globe1 and globe2\n",
    "with open('globe2_bad.csv', 'w') as bad_file:\n",
    "    with open('globe2_good.csv', 'w') as good_file:\n",
    "        with open('globe.csv') as origfile:\n",
    "            for row in itertools.islice(csv.reader(origfile), 870753, None):\n",
    "                #print row[0].strip('+')\n",
    "                #print(row[0])\n",
    "                clean = row[0].strip('+')\n",
    "                prefix = clean[:5]\n",
    "                if int(prefix) in good_globe:\n",
    "                    good_file.write(clean+'\\n')\n",
    "                else:\n",
    "                    bad_file.write(clean+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regular Split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# as globe1 and globe2\n",
    "with open('globe1.csv', 'w') as newfile:\n",
    "    with open('globe.csv') as origfile:\n",
    "        for row in itertools.islice(csv.reader(origfile), 870753):\n",
    "            newfile.write(row[0]+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# as globe1 and globe2\n",
    "with open('globe2.csv', 'w') as newfile:\n",
    "    with open('globe.csv') as origfile:\n",
    "        for row in itertools.islice(csv.reader(origfile), 870753, None):\n",
    "            newfile.write(row[0]+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Split Smart and filter on the good list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import csv, itertools\n",
    "\n",
    "# as smart and smart2\n",
    "with open('smart1_bad.csv', 'w') as bad_file:\n",
    "    with open('smart1_good.csv', 'w') as good_file:\n",
    "        with open('smart.csv') as origfile:\n",
    "            for row in itertools.islice(csv.reader(origfile), 872712):\n",
    "                clean = row[0].strip('+')\n",
    "                prefix = clean[:5]\n",
    "                if int(prefix) in good_smart:\n",
    "                    good_file.write(clean+'\\n')\n",
    "                else:\n",
    "                    bad_file.write(clean+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# as smart and smart2\n",
    "with open('smart2_bad.csv', 'w') as bad_file:\n",
    "    with open('smart2_good.csv', 'w') as good_file:\n",
    "        with open('smart.csv') as origfile:\n",
    "            for row in itertools.islice(csv.reader(origfile), 872712, None):\n",
    "                clean = row[0].strip('+')\n",
    "                prefix = clean[:5]\n",
    "                if int(prefix) in good_smart:\n",
    "                    good_file.write(clean+'\\n')\n",
    "                else:\n",
    "                    bad_file.write(clean+'\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regular Split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import csv, itertools\n",
    "\n",
    "# as globe1 and globe2\n",
    "with open('smart1.csv', 'w') as newfile:\n",
    "    with open('smart.csv') as origfile:\n",
    "        for row in itertools.islice(csv.reader(origfile), 872712):\n",
    "            #print(row[0])\n",
    "            newfile.write(row[0]+'\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "# as globe1 and globe2\n",
    "with open('smart2.csv', 'w') as newfile:\n",
    "    with open('smart.csv') as origfile:\n",
    "        for row in itertools.islice(csv.reader(origfile), 872712, None):\n",
    "            #print(row[0])\n",
    "            newfile.write(row[0]+'\\n')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1741506"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sum(1 for line in open('globe.csv'))\n",
	"# 870,753 = 1741506/2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1745425"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sum(1 for line in open('smart.csv'))\n",
	"# 872,712.5 = 1745425/2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"smart1 bad is 667413\n",
	"smart1 good is 203340\n",
	"globe1 bad is 549565\n",
	"globe1 good is 321188\n"
	]
	}
	],
	"source": [
	"smart1_bad = sum(1 for line in open('smart1_bad.csv'))\n",
	"smart1_good = sum(1 for line in open('smart1_good.csv'))\n",
	"globe1_bad = sum(1 for line in open('globe1_bad.csv'))\n",
	"globe1_good = sum(1 for line in open('globe1_good.csv'))\n",
	"print(\"smart1 bad is \"+ str(smart1_bad))\n",
	"print(\"smart1 good is \"+ str(smart1_good))\n",
	"print(\"globe1 bad is \"+ str(globe1_bad))\n",
	"print(\"globe1 good is \"+ str(globe1_good))\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 36,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"smart is 1745425\n",
	"smart 1 is 872712\n",
	"smart 2 is 872713\n"
	]
	}
	],
	"source": [
	"smart = sum(1 for line in open('smart.csv'))\n",
	"smart1 = sum(1 for line in open('smart1.csv'))\n",
	"smart2 = sum(1 for line in open('smart2.csv'))\n",
	"\n",
	"print(\"smart is \"+ str(smart))\n",
	"print(\"smart 1 is \"+ str(smart1))\n",
	"print(\"smart 2 is \"+ str(smart2))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1745425"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"num_lines\n",
	"# 1745425 = 872,712 + 872,713"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Good prefixes from https://docs.google.com/spreadsheets/d/1tlvcmpeFMN5ZHgim8TZkLTwTwhIvjbFY0ZUl7Lpabow/edit#gid=571114283\n",
	"\n",
	"good_globe=[\n",
	"63905,\n",
	"63915,\n",
	"63916,\n",
	"63926,\n",
	"63927,\n",
	"63997,\n",
	"63995]\n",
	"\n",
	"good_smart=[\n",
	"63908,\n",
	"63919,\n",
	"63920,\n",
	"63930,\n",
	"63939,\n",
	"63998,\n",
	"63999]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"true\n"
	]
	}
	],
	"source": [
	"\n",
	"mine = \"639080000704\"\n",
	"prefix = mine[:5]\n",
	"\n",
	"if int(prefix) in good_smart:\n",
	" print(\"true\")\n",
	"else:\n",
	" print(\"untrue\") "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Split Globe and filter on the good list"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"import csv, itertools\n",
	"\n",
	"# as globe1 and globe2\n",
	"with open('globe1_bad.csv', 'w') as bad_file:\n",
	" with open('globe1_good.csv', 'w') as good_file:\n",
	" with open('globe.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 870753):\n",
	" #print row[0].strip('+')\n",
	" #print(row[0])\n",
	" clean = row[0].strip('+')\n",
	" prefix = clean[:5]\n",
	" if int(prefix) in good_globe:\n",
	" good_file.write(clean+'\\n')\n",
	" else:\n",
	" bad_file.write(clean+'\\n')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [],
	"source": [
	"# as globe1 and globe2\n",
	"with open('globe2_bad.csv', 'w') as bad_file:\n",
	" with open('globe2_good.csv', 'w') as good_file:\n",
	" with open('globe.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 870753, None):\n",
	" #print row[0].strip('+')\n",
	" #print(row[0])\n",
	" clean = row[0].strip('+')\n",
	" prefix = clean[:5]\n",
	" if int(prefix) in good_globe:\n",
	" good_file.write(clean+'\\n')\n",
	" else:\n",
	" bad_file.write(clean+'\\n')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Regular Split"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {},
	"outputs": [],
	"source": [
	"# as globe1 and globe2\n",
	"with open('globe1.csv', 'w') as newfile:\n",
	" with open('globe.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 870753):\n",
	" newfile.write(row[0]+'\\n')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [],
	"source": [
	"# as globe1 and globe2\n",
	"with open('globe2.csv', 'w') as newfile:\n",
	" with open('globe.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 870753, None):\n",
	" newfile.write(row[0]+'\\n')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Split Smart and filter on the good list"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"import csv, itertools\n",
	"\n",
	"# as smart and smart2\n",
	"with open('smart1_bad.csv', 'w') as bad_file:\n",
	" with open('smart1_good.csv', 'w') as good_file:\n",
	" with open('smart.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 872712):\n",
	" clean = row[0].strip('+')\n",
	" prefix = clean[:5]\n",
	" if int(prefix) in good_smart:\n",
	" good_file.write(clean+'\\n')\n",
	" else:\n",
	" bad_file.write(clean+'\\n')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [],
	"source": [
	"# as smart and smart2\n",
	"with open('smart2_bad.csv', 'w') as bad_file:\n",
	" with open('smart2_good.csv', 'w') as good_file:\n",
	" with open('smart.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 872712, None):\n",
	" clean = row[0].strip('+')\n",
	" prefix = clean[:5]\n",
	" if int(prefix) in good_smart:\n",
	" good_file.write(clean+'\\n')\n",
	" else:\n",
	" bad_file.write(clean+'\\n')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Regular Split"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import csv, itertools\n",
	"\n",
	"# as globe1 and globe2\n",
	"with open('smart1.csv', 'w') as newfile:\n",
	" with open('smart.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 872712):\n",
	" #print(row[0])\n",
	" newfile.write(row[0]+'\\n')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"\n",
	"# as globe1 and globe2\n",
	"with open('smart2.csv', 'w') as newfile:\n",
	" with open('smart.csv') as origfile:\n",
	" for row in itertools.islice(csv.reader(origfile), 872712, None):\n",
	" #print(row[0])\n",
	" newfile.write(row[0]+'\\n')"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}