Created
August 19, 2013 05:45
-
-
Save astynax/6266033 to your computer and use it in GitHub Desktop.
Notebook для м/к по регуляркам
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# \u0420\u0435\u0433\u0443\u043b\u044f\u0440\u043d\u044b\u0435 \u0432\u044b\u0440\u0430\u0436\u0435\u043d\u0438\u044f\n", | |
"## \u0418\u0442\u0430\u043a:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import re" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"\u0420\u0435\u0433\u0443\u043b\u044f\u0440\u043a\u0438 \u0441\u043e\u0441\u0442\u043e\u044f\u0442 \u0438\u0437\n", | |
"\n", | |
"- *\u043b\u044e\u0431\u043e\u0433\u043e \u0441\u0438\u043c\u0432\u043e\u043b\u0430*\n", | |
"- \u043a\u043b\u0430\u0441\u0441\u043e\u0432 \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432\n", | |
"- \u043a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432\n", | |
"- *\u044f\u043a\u043e\u0440\u0435\u0439*\n", | |
"- \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432 \u0433\u0440\u0443\u043f\u043f\u0438\u0440\u043e\u0432\u043a\u0438\n", | |
"- \u043f\u0435\u0440\u0435\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u0439\n", | |
"- \u043a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440\u0438\u0435\u0432\n", | |
"- \u043c\u043e\u0434\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432\n", | |
"- *\u0437\u0430\u0433\u043b\u044f\u0434\u044b\u0432\u0430\u043d\u0438\u0439* \u0432\u043f\u0435\u0440\u0435\u0434/\u043d\u0430\u0437\u0430\u0434" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u0421\u0438\u043c\u0432\u043e\u043b\u044b: \u043b\u044e\u0431\u043e\u0439 \u0438 \u043a\u043b\u0430\u0441\u0441\u044b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def m(r, s):\n", | |
" match = re.match(r, s)\n", | |
" if match:\n", | |
" return match.group()\n", | |
" return \"Nope!\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# . \\d \\D \\s \\S \\w \\W [^...-...]\n", | |
"l = ['a', 'B', ' ', '\\t', '1', '0']\n", | |
"def select(r, xs):\n", | |
" return [m.group() for m in (re.match(r, x) for x in xs) if m]\n", | |
"print select('\\d', l), '\u0446\u0438\u0444\u0440\u044b'\n", | |
"print select('\\D', l), '\u041d\u0415 \u0446\u0438\u0444\u0440\u044b'\n", | |
"print select('\\s', l), '\u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n", | |
"print select('\\S', l), '\u041d\u0415 \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n", | |
"print select('\\w', l), '\u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'\n", | |
"print select('\\W', l), '\u041d\u0415 \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"['1', '0'] \u0446\u0438\u0444\u0440\u044b\n", | |
"['a', 'B', ' ', '\\t'] \u041d\u0415 \u0446\u0438\u0444\u0440\u044b\n", | |
"[' ', '\\t'] \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n", | |
"['a', 'B', '1', '0'] \u041d\u0415 \u043f\u0440\u043e\u0431\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n", | |
"['a', 'B', '1', '0'] \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n", | |
"[' ', '\\t'] \u041d\u0415 \u0431\u0443\u043a\u0432\u0435\u043d\u043d\u043e-\u0446\u0438\u0444\u0440\u043e\u0432\u044b\u0435 \u0441\u0438\u043c\u0432\u043e\u043b\u044b\n" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u041a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b, \u0436\u0430\u0434\u043d\u044b\u0435 \u0438 \u043d\u0435 \u043e\u0447\u0435\u043d\u044c" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# ? + * {}\n", | |
"l = ['aaa', 'aab', 'abb', 'bbb']\n", | |
"print select(r'a*', l), \"\u0431\u0435\u0440\u0443 \u043b\u044e\u0431\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e \\\"a\\\", \u0434\u0430\u0436\u0435 \u043d\u0443\u043b\u0435\u0432\u043e\u0435!\"\n", | |
"print select(r'a+', l), \"\u043b\u044e\u0431\u043e\u0435 \u043d\u0435\u043d\u0443\u043b\u0435\u0432\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e, \u0431\u0435\u0440\u0443 \u0432\u0441\u0451!\"\n", | |
"print select(r'a*?', l), \"\u0445\u043e\u0447\u0443 0+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043d\u0435 \u0432\u043e\u0437\u044c\u043c\u0443 \u043d\u0438\u0447\u0435\u0433\u043e!)\"\n", | |
"print select(r'a+?', l), \"\u0445\u043e\u0447\u0443 1+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043e\u0434\u043d\u0443 \u0448\u0442\u0443\u043a\u0443)\"\n", | |
"print select(r'a?', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u043e\u0434\u043d\u0443 \u0438\u043b\u0438 \u043d\u0438\u0447\u0435\u0433\u043e!\"\n", | |
"print select(r'a{,2}', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0434\u043e \u0434\u0432\u0443\u0445 \u0448\u0442\u0443\u043a!\"\n", | |
"print select(r'a{1,2}?', l), \"\u0445\u043e\u0447\u0443 \u043e\u0434\u043d\u0443-\u0434\u0432\u0435 \u0448\u0442\u0443\u043a\u0438, \u0432\u043e\u0437\u044c\u043c\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u043e\u0434\u043d\u0443)!\"\n", | |
"print select(r'a{2}', l), \"\u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0440\u043e\u0432\u043d\u043e \u0434\u0432\u0435!\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"['aaa', 'aa', 'a', ''] \u0431\u0435\u0440\u0443 \u043b\u044e\u0431\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e \"a\", \u0434\u0430\u0436\u0435 \u043d\u0443\u043b\u0435\u0432\u043e\u0435!\n", | |
"['aaa', 'aa', 'a'] \u043b\u044e\u0431\u043e\u0435 \u043d\u0435\u043d\u0443\u043b\u0435\u0432\u043e\u0435 \u043a\u043e\u043b-\u0432\u043e, \u0431\u0435\u0440\u0443 \u0432\u0441\u0451!\n", | |
"['', '', '', ''] \u0445\u043e\u0447\u0443 0+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043d\u0435 \u0432\u043e\u0437\u044c\u043c\u0443 \u043d\u0438\u0447\u0435\u0433\u043e!)\n", | |
"['a', 'a', 'a'] \u0445\u043e\u0447\u0443 1+, \u0431\u0435\u0440\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u0442.\u0435. \u043e\u0434\u043d\u0443 \u0448\u0442\u0443\u043a\u0443)\n", | |
"['a', 'a', 'a', ''] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u043e\u0434\u043d\u0443 \u0438\u043b\u0438 \u043d\u0438\u0447\u0435\u0433\u043e!\n", | |
"['aa', 'aa', 'a', ''] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0434\u043e \u0434\u0432\u0443\u0445 \u0448\u0442\u0443\u043a!\n", | |
"['a', 'a', 'a'] \u0445\u043e\u0447\u0443 \u043e\u0434\u043d\u0443-\u0434\u0432\u0435 \u0448\u0442\u0443\u043a\u0438, \u0432\u043e\u0437\u044c\u043c\u0443 \u043c\u0438\u043d\u0438\u043c\u0443\u043c (\u043e\u0434\u043d\u0443)!\n", | |
"['aa', 'aa'] \u0445\u043e\u0447\u0443 (\u0438 \u0431\u0435\u0440\u0443) \u0440\u043e\u0432\u043d\u043e \u0434\u0432\u0435!\n" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u042f\u043a\u043e\u0440\u044f" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# ^ $ \\b \\B\n", | |
"s = 'aa,a ab aa aaa'\n", | |
"print re.findall(r'a+', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0435, \u043d\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043d\u0435\u0432\u0435\u0440\u043d\u044b\u0435 \u0432\u044b\u0431\u043e\u0440\u043a\u0438!\"\n", | |
"print re.findall(r'\\Wa+\\W', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438\"\n", | |
"print re.findall(r'\\sa+\\s', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438\"\n", | |
"print re.findall(r'\\ba+\\b', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u0433\u0440\u0430\u043d\u0438\u0446\u0430\u043c \u0441\u043b\u043e\u0432, \u043d\u0435 \u0431\u0435\u0440\u0443 \u0441\u0430\u043c\u0438 \u0433\u0440\u0430\u043d\u0438\u0446\u044b!\"\n", | |
"print re.findall(r'^a+', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043d\u0430\u0447\u0430\u043b\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\"\n", | |
"print re.findall(r'a+$', s), \"\u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043a\u043e\u043d\u0446\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"['aa', 'a', 'a', 'aa', 'aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0435, \u043d\u043e \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043d\u0435\u0432\u0435\u0440\u043d\u044b\u0435 \u0432\u044b\u0431\u043e\u0440\u043a\u0438!\n", | |
"[',a ', ' aa '] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043d\u0435\u0431\u0443\u043a\u0432\u0430\u043c\u0438\n", | |
"[' aa '] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438 \u043f\u043e \u0431\u043e\u043a\u0430\u043c \u0438 \u0431\u0435\u0440\u0443 \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043f\u0440\u043e\u0431\u0435\u043b\u0430\u043c\u0438\n", | |
"['aa', 'a', 'aa', 'aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u0430 \u043f\u043e \u0433\u0440\u0430\u043d\u0438\u0446\u0430\u043c \u0441\u043b\u043e\u0432, \u043d\u0435 \u0431\u0435\u0440\u0443 \u0441\u0430\u043c\u0438 \u0433\u0440\u0430\u043d\u0438\u0446\u044b!\n", | |
"['aa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043d\u0430\u0447\u0430\u043b\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\n", | |
"['aaa'] \u0432\u044b\u0431\u0438\u0440\u0430\u044e \u0441\u043b\u043e\u0432\u043e \u0432 \u043a\u043e\u043d\u0446\u0435 \u0441\u0442\u0440\u043e\u043a\u0438\n" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u0413\u0440\u0443\u043f\u043f\u0438\u0440\u043e\u0432\u043a\u0430 \u0441 \u043e\u0431\u0440\u0430\u0442\u043d\u043e\u0439 \u0441\u0432\u044f\u0437\u044c\u044e \u0438 \u0431\u0435\u0437" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# () (?:) (?>) (?P<x>...) \\1 \\2 \\3\n", | |
"re.match(r'(a)\\s*(\\d+),?(c)', 'a121233c').groups()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"('a', '121233', 'c')" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print re.match(\n", | |
" r'^([0-9a-fA-F]{1,2})([0-9a-fA-F]{1,2})([0-9a-fA-F]{1,2})$',\n", | |
" 'ff00bb'\n", | |
").groups()\n", | |
"\n", | |
"# \u0432\u0430\u0436\u043d\u043e! \u043a\u0432\u0430\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b \u0440\u0430\u0431\u043e\u0442\u0430\u044e\u0442 \u0434\u043b\u044f \u0433\u0440\u0443\u043f\u043f \u043f\u0440\u0438 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0435,\n", | |
"# \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u044b \u043a\u043e\u043f\u0438\u0440\u0443\u044e\u0442\u0441\u044f \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u0438\u043d\u0434\u0435\u043a\u0441\u0430\u043c\u0438, \u043f\u043e\u044d\u0442\u043e\u043c\u0443 \u0432 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u043f\u043e\u043f\u0430\u0434\u0451\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0430, \u0437\u0430\u0445\u0432\u0430\u0447\u0435\u043d\u043d\u0430\u044f \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0439 \u043a\u043e\u043f\u0438\u0435\u0439!\n", | |
"print re.match(\n", | |
" r'^([0-9a-fA-F]{1,2}){3}$',\n", | |
" 'ff00bb'\n", | |
").groups(), \"\u0421\u0442\u0440\u043e\u043a\u0430 \u0441\u043c\u0430\u0442\u0447\u0438\u043b\u0430\u0441\u044c, \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u0430 \u043e\u0441\u0442\u0430\u043b\u0430\u0441\u044c \u043e\u0434\u043d\u0430!\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"('ff', '00', 'bb')\n", | |
"('bb',) \u0421\u0442\u0440\u043e\u043a\u0430 \u0441\u043c\u0430\u0442\u0447\u0438\u043b\u0430\u0441\u044c, \u043d\u043e \u0433\u0440\u0443\u043f\u043f\u0430 \u043e\u0441\u0442\u0430\u043b\u0430\u0441\u044c \u043e\u0434\u043d\u0430!\n" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"re.findall(r'([\\\"\\']+)(\\d+)\\1', '\"123\" \\'1\\' \"7\\' \"\"4\"\" \"\"\"42\"\"\"')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"[('\"', '123'), (\"'\", '1'), ('\"\"', '4'), ('\"\"\"', '42')]" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"re.match(r'^(.?)(.?)(.?).?\\3\\2\\1$', 'acbbca').groups()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"('a', 'c', 'b')" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"re.match(r'\\s*(?P<x>\\d+)\\s*,\\s*(?P<y>\\d+)\\s*', '1,2'\n", | |
" ).groupdict()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"{'x': '1', 'y': '2'}" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u041f\u0435\u0440\u0435\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u044f" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# a|b" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print re.match(r'^((?:\\d+)|(?:[VXI]+))$', '123').groups()\n", | |
"print re.match(r'^((?:\\d+)|(?:[VXI]+))$', 'VII').groups()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"('123',)\n", | |
"('VII',)\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u041a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440\u0438\u0438" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# (?#)\n", | |
"re.match(\n", | |
" (\n", | |
" r'^'\n", | |
" r'(?#red)([0-9a-fA-F]{1,2})'\n", | |
" r'(?#green)([0-9a-fA-F]{1,2})'\n", | |
" r'(?#green)([0-9a-fA-F]{1,2})'\n", | |
" r'$'\n", | |
" ),\n", | |
" r'ff00ff'\n", | |
").groups()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
"('ff', '00', 'ff')" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u041c\u043e\u0434\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# (?ismx)\n", | |
"m(r'(?i)a', 'A')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"'A'" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"m(r'''(?x)\n", | |
" ^ # begin of line\n", | |
" ([0-9a-fA-F]{1,2}) # red\n", | |
" ([0-9a-fA-F]{1,2}) # green\n", | |
" ([0-9a-fA-F]{1,2}) # blue\n", | |
" $ # end of line\n", | |
" ''', 'ff00ff')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 15, | |
"text": [ | |
"'ff00ff'" | |
] | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"s = 'a\\nab\\nabc'\n", | |
"print \"\u043f\u043e-\u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0442\u043e\u0447\u043a\u0430 \u043d\u0435 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a, \u0430 ^ \u0438 $ \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0432\u0441\u0435\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0430:\\n\", (\n", | |
" re.findall(r'^.+$', s))\n", | |
"print \"^ \u0438 $ \u0442\u0435\u043f\u0435\u0440\u044c \u043e\u0431\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u044e\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0438:\\n\", (\n", | |
" re.findall(r'(?m)^.+$', s))\n", | |
"print \"\u0442\u043e\u0447\u043a\u0430 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a:\\n\", (\n", | |
" re.findall(r'(?s)^.+$', s))\n", | |
"print \"\\A\\Z \u0432\u0441\u0435\u0433\u0434\u0430 \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0442\u0435\u043a\u0441\u0442\u0430:\"\n", | |
"print re.findall(r'\\A.+\\Z', s)\n", | |
"print re.findall(r'(?m)\\A.+\\Z', s)\n", | |
"print re.findall(r'(?s)\\A.+\\Z', s)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\u043f\u043e-\u0443\u043c\u043e\u043b\u0447\u0430\u043d\u0438\u044e \u0442\u043e\u0447\u043a\u0430 \u043d\u0435 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a, \u0430 ^ \u0438 $ \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0432\u0441\u0435\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0430:\n", | |
"[]\n", | |
"^ \u0438 $ \u0442\u0435\u043f\u0435\u0440\u044c \u043e\u0431\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u044e\u0442 \u043f\u043e\u0434\u0441\u0442\u0440\u043e\u043a\u0438:\n", | |
"['a', 'ab', 'abc']\n", | |
"\u0442\u043e\u0447\u043a\u0430 \u0437\u0430\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u0438 \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u044b \u0441\u0442\u0440\u043e\u043a:\n", | |
"['a\\nab\\nabc']\n", | |
"\\A\\Z \u0432\u0441\u0435\u0433\u0434\u0430 \u043e\u0431\u043e\u0437\u043d\u0430\u0447\u0430\u044e\u0442 \u0433\u0440\u0430\u043d\u0438\u0446\u044b \u0442\u0435\u043a\u0441\u0442\u0430:\n", | |
"[]\n", | |
"[]\n", | |
"['a\\nab\\nabc']\n" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### \u0417\u0430\u0433\u043b\u044f\u0434\u044b\u0432\u0430\u043d\u0438\u0435 \u0432\u043f\u0435\u0440\u0435\u0434 \u0438 \u043d\u0430\u0437\u0430\u0434" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# (?=...)(?!...)(?<=...)(?<!...)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"re.findall(r'(aaa(?=b))|(bbb(?=a))', 'aaabbbaaaccc')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 18, | |
"text": [ | |
"[('aaa', ''), ('', 'bbb')]" | |
] | |
} | |
], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"re.findall(r'(aaa)(?:b)|(bbb)(?:a)', 'aaabbbaaabbb')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 19, | |
"text": [ | |
"[('aaa', ''), ('aaa', '')]" | |
] | |
} | |
], | |
"prompt_number": 19 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## \u041f\u043e\u0438\u0441\u043a/\u0417\u0430\u043c\u0435\u043d\u0430" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# \\1 \\g<n>" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 20 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print re.sub(\n", | |
" r\"(?P<name>\\w)\\s*=\\s*(?P<val>(?:\\d+)|(?:\\S+))\",\n", | |
" r\"'\\g<name>': '\\g<val>'\",\n", | |
" \"x = asd\"\n", | |
")" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"'x': 'asd'\n" | |
] | |
} | |
], | |
"prompt_number": 21 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment