Skip to content

Instantly share code, notes, and snippets.

@amitkot
Last active December 18, 2015 18:09
Show Gist options
  • Save amitkot/5824002 to your computer and use it in GitHub Desktop.
Save amitkot/5824002 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "python notebook.ipynb"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Python - Notes from 16/6/2013\n",
"\n",
"# 1 - Introduction"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 'hello'\n",
"print(a.upper())"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HELLO\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(print)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-5-3e4738e8def3>, line 1)",
"output_type": "pyerr",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-5-3e4738e8def3>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m help(print)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def a():\n",
" return 3\n",
"a()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 11,
"text": [
"3"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 'hello'\n",
"#a.upper()\n",
"#len(a)\n",
"a.__len__()\n",
"help(a.upper)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Help on built-in function upper:\n",
"\n",
"upper(...)\n",
" S.upper() -> string\n",
" \n",
" Return a copy of the string S converted to uppercase.\n",
"\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2 - Fundamental Variables"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 4\n",
"print(a)\n",
"a = None\n",
"print(a)\n",
"print(not_defined)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'not_defined' is not defined",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-20-7c69e067efe1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnot_defined\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'not_defined' is not defined"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"4\n",
"None\n"
]
}
],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 'hello'\n",
"dir(a)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 21,
"text": [
"['__add__',\n",
" '__class__',\n",
" '__contains__',\n",
" '__delattr__',\n",
" '__doc__',\n",
" '__eq__',\n",
" '__format__',\n",
" '__ge__',\n",
" '__getattribute__',\n",
" '__getitem__',\n",
" '__getnewargs__',\n",
" '__getslice__',\n",
" '__gt__',\n",
" '__hash__',\n",
" '__init__',\n",
" '__le__',\n",
" '__len__',\n",
" '__lt__',\n",
" '__mod__',\n",
" '__mul__',\n",
" '__ne__',\n",
" '__new__',\n",
" '__reduce__',\n",
" '__reduce_ex__',\n",
" '__repr__',\n",
" '__rmod__',\n",
" '__rmul__',\n",
" '__setattr__',\n",
" '__sizeof__',\n",
" '__str__',\n",
" '__subclasshook__',\n",
" '_formatter_field_name_split',\n",
" '_formatter_parser',\n",
" 'capitalize',\n",
" 'center',\n",
" 'count',\n",
" 'decode',\n",
" 'encode',\n",
" 'endswith',\n",
" 'expandtabs',\n",
" 'find',\n",
" 'format',\n",
" 'index',\n",
" 'isalnum',\n",
" 'isalpha',\n",
" 'isdigit',\n",
" 'islower',\n",
" 'isspace',\n",
" 'istitle',\n",
" 'isupper',\n",
" 'join',\n",
" 'ljust',\n",
" 'lower',\n",
" 'lstrip',\n",
" 'partition',\n",
" 'replace',\n",
" 'rfind',\n",
" 'rindex',\n",
" 'rjust',\n",
" 'rpartition',\n",
" 'rsplit',\n",
" 'rstrip',\n",
" 'split',\n",
" 'splitlines',\n",
" 'startswith',\n",
" 'strip',\n",
" 'swapcase',\n",
" 'title',\n",
" 'translate',\n",
" 'upper',\n",
" 'zfill']"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"help(a.upper)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Help on built-in function upper:\n",
"\n",
"upper(...)\n",
" S.upper() -> string\n",
" \n",
" Return a copy of the string S converted to uppercase.\n",
"\n"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"34 + '34'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "unsupported operand type(s) for +: 'int' and 'str'",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-23-3786c30f3de2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;36m34\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'34'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'int' and 'str'"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 34\n",
"'34' + str(a)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": [
"'3434'"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"0b010101"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 26,
"text": [
"21"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"l = [[1, 2, 3],\n",
" [4, 5, 6],\n",
" [7, 8, 9]]\n",
"l[0][0]\n",
"l[1][0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 28,
"text": [
"4"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import math\n",
"print(math.pi)\n",
"print(math.e)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"3.14159265359\n",
"2.71828182846\n"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"3/5"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 31,
"text": [
"0"
]
}
],
"prompt_number": 31
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Some more examples\n",
"\n",
"3/5\n",
"Out[1]: 0\n",
"\n",
"3./5\n",
"Out[2]: 0.6\n",
"\n",
"3.0/5\n",
"Out[3]: 0.6\n",
"\n",
"## Accessing last result\n",
"a = _\n",
"\n",
"int(a)\n",
"Out[5]: 0\n",
"\n",
"## Accuracy not limited by how many digits we wrote after the . \n",
"1.0/3\n",
"Out[6]: 0.3333333333333333\n",
"\n",
"## Operators with different types\n",
"a = 34\n",
"\n",
"print(a + 'banana')\n",
"---------------------------------------------------------------------------\n",
"TypeError Traceback (most recent call last)\n",
"<ipython-input-8-d542a37c02f3> in <module>()\n",
"----> 1 print(a + 'banana')\n",
"\n",
"TypeError: unsupported operand type(s) for +: 'int' and 'str'\n",
"\n",
"print(str(a) + 'banana')\n",
"34banana\n",
"\n",
"a\n",
"Out[10]: 34\n",
"\n",
"type(a)\n",
"Out[11]: int\n",
"\n",
"## Python is 0-based (list indices start at 0!)\n",
"l = [1, 2, 3]\n",
"\n",
"l = [11, 22, 33]\n",
"\n",
"l[1]\n",
"Out[14]: 22\n",
"\n",
"l[0]\n",
"Out[15]: 11\n",
"\n",
"l[3]\n",
"---------------------------------------------------------------------------\n",
"IndexError Traceback (most recent call last)\n",
"<ipython-input-16-297534cddaa1> in <module>()\n",
"----> 1 l[3]\n",
"\n",
"IndexError: list index out of range\n",
"\n",
"len(l)\n",
"Out[17]: 3\n",
"\n",
"## Tuples are defined by using , and not ()\n",
"t = 1, 2, 3\n",
"\n",
"t1 = (1, 2, 3)\n",
"\n",
"t\n",
"Out[20]: (1, 2, 3)\n",
"\n",
"t1\n",
"Out[21]: (1, 2, 3)\n",
"\n",
"type(t)\n",
"Out[22]: tuple\n",
"\n",
"t = (1)\n",
"\n",
"t1 = (1, )\n",
"\n",
"t2 = 1,\n",
"\n",
"t\n",
"Out[26]: 1\n",
"\n",
"t1\n",
"Out[27]: (1,)\n",
"\n",
"t2\n",
"Out[28]: (1,)\n",
"\n",
"len(t1)\n",
"Out[29]: 1\n",
"\n",
"## Using tuple() and list()\n",
"t = tuple([3])\n",
"\n",
"t\n",
"Out[31]: (3,)\n",
"\n",
"l = list(1)\n",
"---------------------------------------------------------------------------\n",
"TypeError Traceback (most recent call last)\n",
"<ipython-input-32-2e0ebdf40242> in <module>()\n",
"----> 1 l = list(1)\n",
"\n",
"TypeError: 'int' object is not iterable\n",
"\n",
"l = list([1])\n",
"\n",
"l\n",
"Out[34]: [1]\n",
"\n",
"t =(,)\n",
" File \"<ipython-input-35-004da2bde009>\", line 1\n",
" t =(,)\n",
" ^\n",
"SyntaxError: invalid syntax\n",
"\n",
"\n",
"t = tuple()\n",
"\n",
"t\n",
"Out[37]: ()\n",
"\n",
"t = ,\n",
" File \"<ipython-input-38-4c149d329077>\", line 1\n",
" t = ,\n",
" ^\n",
"SyntaxError: invalid syntax\n",
"\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print('hello ' 'world')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"hello world\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = 'hello '\n",
"print(a 'world')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-2-2a4a63b0a0f0>, line 2)",
"output_type": "pyerr",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-2-2a4a63b0a0f0>\"\u001b[1;36m, line \u001b[1;32m2\u001b[0m\n\u001b[1;33m print(a 'world')\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print 'a', 'b'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a b\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3 - Flow Control"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"if -1:\n",
" print 'indeed'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"indeed\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"7 is 7.0"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 5,
"text": [
"False"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"6.0 is 6.00"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 8,
"text": [
"True"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"range(0, 1, 1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 12,
"text": [
"[0]"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = [1, 2, 3, 4]\n",
"b = ['b']\n",
"zip(a, b)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 14,
"text": [
"[(1, 'b')]"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"if True:\n",
" None"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Strings\n",
"\n",
"Reminder - using \\ to break the line"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print('This is a very long string that will span \\\n",
"two lines of code')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"This is a very long string that will span two lines of code\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Slice the string from the third place until the end with a jump of 3 each time:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'abcdefghijklmnopqrstuvwxyz'\n",
"s[2::3]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 3,
"text": [
"'cfilorux'"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = set([1, 2, 3])\n",
"print(s)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"set([1, 2, 3])\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"set[0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'type' object has no attribute '__getitem__'",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-cab73aafaa40>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mset\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m: 'type' object has no attribute '__getitem__'"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s[0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'set' object does not support indexing",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-6-4e98c4f87897>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0ms\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m: 'set' object does not support indexing"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sweet = {'jam', 'sugar', 'banana', 'mango'}"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type(sweet)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 8,
"text": [
"set"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = set()\n",
"s.pop()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'pop from an empty set'",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-9-b1267329ee54>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m: 'pop from an empty set'"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s.remove?"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Regular Expressions"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"\n",
"re.search(r'(aaa) \\1', 'helllo')\n",
"\n",
"re.search(r'(aaa) \\1', 'helllo').span()\n",
"---------------------------------------------------------------------------\n",
"AttributeError Traceback (most recent call last)\n",
"<ipython-input-69-a78170fce83d> in <module>()\n",
"----> 1 re.search(r'(aaa) \\1', 'helllo').span()\n",
"\n",
"AttributeError: 'NoneType' object has no attribute 'span'\n",
"\n",
"re.search(r'(aaa) \\1', 'aaa').span()\n",
"---------------------------------------------------------------------------\n",
"AttributeError Traceback (most recent call last)\n",
"<ipython-input-70-edf5972ac180> in <module>()\n",
"----> 1 re.search(r'(aaa) \\1', 'aaa').span()\n",
"\n",
"AttributeError: 'NoneType' object has no attribute 'span'\n",
"\n",
"re.search(r'(aaa)', 'aaa').span()\n",
"Out[71]: (0, 3)\n",
"\n",
"re.search(r'(aaa) \\1', 'aaa').span()\n",
"---------------------------------------------------------------------------\n",
"AttributeError Traceback (most recent call last)\n",
"<ipython-input-72-edf5972ac180> in <module>()\n",
"----> 1 re.search(r'(aaa) \\1', 'aaa').span()\n",
"\n",
"AttributeError: 'NoneType' object has no attribute 'span'\n",
"\n",
"re.search(r'(aaa) \\1', 'aaa aaa').span()\n",
"Out[73]: (0, 7)\n",
"\n",
"re.search(r'(aaa)\\1', 'aaa aaa').span()\n",
"---------------------------------------------------------------------------\n",
"AttributeError Traceback (most recent call last)\n",
"<ipython-input-74-9f333de65847> in <module>()\n",
"----> 1 re.search(r'(aaa)\\1', 'aaa aaa').span()\n",
"\n",
"AttributeError: 'NoneType' object has no attribute 'span'\n",
"\n",
"re.search(r'(aaa)\\1', 'aaaaaa').span()\n",
"Out[75]: (0, 6)\n",
"\n",
"re.search(r'(aaa) \\1', 'aaa aaa').span()\n",
"Out[76]: (0, 7)\n",
"\n",
"re.search(r'(.+) \\1', 'aaa aaa').span()\n",
"Out[77]: (0, 7)\n",
"\n",
"re.search(r'(.+) \\1', 'asa asa').span()\n",
"Out[78]: (0, 7)\n",
"\n",
"re.search(r'(.+) \\1', 'banana asa asa').span()\n",
"Out[79]: (5, 8)\n",
"\n",
"re.search(r'(.+) \\2', 'banana asa asa').span()\n",
"---------------------------------------------------------------------------\n",
"error Traceback (most recent call last)\n",
"<ipython-input-80-10e15f8331ad> in <module>()\n",
"----> 1 re.search(r'(.+) \\2', 'banana asa asa').span()\n",
"\n",
"C:\\Program Files (x86)\\Enthought\\Canopy32\\App\\appdata\\canopy-1.0.1.1189.win-x86\\lib\\re.pyc in search(pattern, string, flags)\n",
" 140 \"\"\"Scan through string looking for a match to the pattern, returning\n",
" 141 a match object, or None if no match was found.\"\"\"\n",
"--> 142 return _compile(pattern, flags).search(string)\n",
" 143 \n",
" 144 def sub(pattern, repl, string, count=0, flags=0):\n",
"\n",
"C:\\Program Files (x86)\\Enthought\\Canopy32\\App\\appdata\\canopy-1.0.1.1189.win-x86\\lib\\re.pyc in _compile(*key)\n",
" 240 p = sre_compile.compile(pattern, flags)\n",
" 241 except error, v:\n",
"--> 242 raise error, v # invalid expression\n",
" 243 if len(_cache) >= _MAXCACHE:\n",
" 244 _cache.clear()\n",
"\n",
"error: bogus escape: '\\\\2'\n",
"\n",
"re.search(r'(.+) (banana) \\2', 'banana asa asa').span()\n",
"---------------------------------------------------------------------------\n",
"AttributeError Traceback (most recent call last)\n",
"<ipython-input-81-851b09bf8749> in <module>()\n",
"----> 1 re.search(r'(.+) (banana) \\2', 'banana asa asa').span()\n",
"\n",
"AttributeError: 'NoneType' object has no attribute 'span'\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-11-d6035502bbf2>, line 6)",
"output_type": "pyerr",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-11-d6035502bbf2>\"\u001b[1;36m, line \u001b[1;32m6\u001b[0m\n\u001b[1;33m ---------------------------------------------------------------------------\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Possible groups here are 'a' and 'ana'. The re module selects the bigger group ('ana')."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'ana ana'\n",
"re.search(r'(\\w+) \\1', s).group(1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 34,
"text": [
"'ana'"
]
}
],
"prompt_number": 34
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can ask that the group repeat more than one time in the string:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"m = re.search(r'(.+) and aa(.*) and also \\2 \\1 \\1', 'Yellow and aaGreen and also Green Yellow Yellow')\n",
"print('first - {}, second - {}'.format(m.group(1), m.group(2)))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"first - Yellow, second - Green\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Some more grouping tricks:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'Superman is here twice: SupermanSupermanSuperman'\n",
"m = re.search(r'(.+) is here twice: \\1+', s)\n",
"print('first - {}'.format(m.group(1)))\n",
"print(m.span())\n",
"print(len(s))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"first - Superman\n",
"(0, 48)\n",
"48\n"
]
}
],
"prompt_number": 35
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can define a group and not use it:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'Superman is here twice: SupermanSupermanSuperman'\n",
"m = re.search(r'.+ is (.+) (.+)', s)\n",
"print('first - {}, SECOND - {}'.format(m.group(1), m.group(2)))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"first - here twice:, SECOND - SupermanSupermanSuperman\n"
]
}
],
"prompt_number": 36
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using groups to extract the prime minister's name from a given text:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s = 'These days the politics of israel is very interesting, with prime Minister Netanyahu everything is good'\n",
"m = re.search(r'Prime Minister (\\w+)', s, re.IGNORECASE)\n",
"print('first - {}'.format(m.group(1)))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"first - Netanyahu\n"
]
}
],
"prompt_number": 37
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Iterating over match objects with finditer:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m = re.search(r'fun\\w*', 'having fun with the funny cat')\n",
"m.span()\n",
"\n",
"ms = re.finditer(r'fun\\w*', 'having fun with the funny cat')\n",
"for m in ms:\n",
" print m.span()\n",
"re.findall(r'fun\\w*', 'having fun with the funny cat')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(7, 10)\n",
"(20, 25)\n"
]
},
{
"output_type": "pyout",
"prompt_number": 38,
"text": [
"['fun', 'funny']"
]
}
],
"prompt_number": 38
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"reminder: re.search finds only the first match:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m = re.search(r'(fun\\w*) ', 'having fun with the funny cat')\n",
"m.group(1)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 39,
"text": [
"'fun'"
]
}
],
"prompt_number": 39
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Functions"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"s = 'this is nice'\n",
"su = re.sub(r'(.)', r'\\1', s)\n",
"print su"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"this is nice\n"
]
}
],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"s = 'this is nice'\n",
"su = re.sub(r'(.)', r'(\\1)', s)\n",
"print su"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(t)(h)(i)(s)( )(i)(s)( )(n)(i)(c)(e)\n"
]
}
],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"s = 'this is nice'\n",
"su = re.sub(r'(.)', lambda m: m.group(1).upper(), s)\n",
"print su"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"THIS IS NICE\n"
]
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"s = 'this is nice'\n",
"su = re.sub(r'(\\w+)', lambda m: m.group(1).upper(), s)\n",
"print su"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"THIS IS NICE\n"
]
}
],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import re\n",
"s = 'this is nice'\n",
"su = re.sub(r'(\\w+)', lambda m: str(m.span()), s)\n",
"print su"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(0, 4) (5, 7) (8, 12)\n"
]
}
],
"prompt_number": 46
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Advanced Collections"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"map(lambda n: n**2, [1, 2, 3, 4, 5])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 50,
"text": [
"[1, 4, 9, 16, 25]"
]
}
],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"map(lambda n: n**2, (1, 2, 3, 4, 5))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 52,
"text": [
"[1, 4, 9, 16, 25]"
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[i**2 for i in [1, 2, 3, 4, 5]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 53,
"text": [
"[1, 4, 9, 16, 25]"
]
}
],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import math\n",
"[math.ceil(i) for i in [0.1, 0.33, 4.5]]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 54,
"text": [
"[1.0, 1.0, 5.0]"
]
}
],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[i*3 for i in range(5)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 2,
"text": [
"[0, 3, 6, 9, 12]"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"(i*3 for i in range(5))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<generator object <genexpr> at 0x05EC8580>"
]
}
],
"prompt_number": 3
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment