Skip to content

Instantly share code, notes, and snippets.

@adam704a
Created January 18, 2018 14:14
Show Gist options
  • Save adam704a/c43a44dc83aeaa3eed9e1f6df280d1a8 to your computer and use it in GitHub Desktop.
Save adam704a/c43a44dc83aeaa3eed9e1f6df280d1a8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1741506"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(1 for line in open('globe.csv'))\n",
"# 870,753 = 1741506/2"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"globe 1 is 870753\n",
"globe 2 is 870753\n"
]
}
],
"source": [
"globe = sum(1 for line in open('globe.csv'))\n",
"globe1 = sum(1 for line in open('globe1.csv'))\n",
"globe2 = sum(1 for line in open('globe2.csv'))\n",
"print(\"globe is \"+ str(globe))\n",
"print(\"globe 1 is \"+ str(globe1))\n",
"print(\"globe 2 is \"+ str(globe2))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"smart is 1745425\n",
"smart 1 is 872712\n",
"smart 2 is 872713\n"
]
}
],
"source": [
"smart = sum(1 for line in open('smart.csv'))\n",
"smart1 = sum(1 for line in open('smart1.csv'))\n",
"smart2 = sum(1 for line in open('smart2.csv'))\n",
"\n",
"print(\"smart is \"+ str(smart))\n",
"print(\"smart 1 is \"+ str(smart1))\n",
"print(\"smart 2 is \"+ str(smart2))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1745425"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num_lines\n",
"# 1745425 = 872,712 + 872,713"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv\n",
"with open('globe.csv', 'r') as csvfile:\n",
" reader = csv.reader(csvfile, delimiter=' ', quotechar='|')\n",
" for row in itertools.islice(csv.DictReader(csvfile), 100)::\n",
" for row in \n",
" print(row['first_name'], row['last_name'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split Globe"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import csv, itertools\n",
"\n",
"# as globe1 and globe2\n",
"with open('globe1.csv', 'w') as newfile:\n",
" with open('globe.csv') as origfile:\n",
" for row in itertools.islice(csv.reader(origfile), 870753):\n",
" #print(row[0])\n",
" newfile.write(row[0]+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"with open('globe2.csv', 'w') as newfile:\n",
" with open('globe.csv') as origfile:\n",
" for row in itertools.islice(csv.reader(origfile), 870753, None):\n",
" #print(row[0])\n",
" newfile.write(row[0]+'\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split Smart"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import csv, itertools\n",
"\n",
"# as globe1 and globe2\n",
"with open('smart1.csv', 'w') as newfile:\n",
" with open('smart.csv') as origfile:\n",
" for row in itertools.islice(csv.reader(origfile), 872712):\n",
" #print(row[0])\n",
" newfile.write(row[0]+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv, itertools\n",
"\n",
"# as globe1 and globe2\n",
"with open('smart2.csv', 'w') as newfile:\n",
" with open('smart.csv') as origfile:\n",
" for row in itertools.islice(csv.reader(origfile), 872712, None):\n",
" #print(row[0])\n",
" newfile.write(row[0]+'\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment