Last active
December 24, 2015 05:49
-
-
Save BenLangmead/6753277 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from bisect import bisect_left, bisect_right # for binary search\n", | |
"import random\n", | |
"random.seed(77) # so that you and I get same random lists" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"a = [1, 2, 3, 3, 3, 4, 5]\n", | |
"bisect_left(a, 3), bisect_right(a, 3)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"(2, 5)" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"a = [2, 4, 6, 8, 10]\n", | |
"bisect_left(a, 5), bisect_right(a, 5)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"(2, 2)" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# make a list of pseudo-random integers in [1, 99]\n", | |
"ls = [ random.randint(1, 99) for _ in xrange(30) ]\n", | |
"ls # show it" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"[80,\n", | |
" 33,\n", | |
" 24,\n", | |
" 82,\n", | |
" 12,\n", | |
" 48,\n", | |
" 56,\n", | |
" 61,\n", | |
" 15,\n", | |
" 63,\n", | |
" 50,\n", | |
" 85,\n", | |
" 3,\n", | |
" 95,\n", | |
" 18,\n", | |
" 81,\n", | |
" 51,\n", | |
" 83,\n", | |
" 19,\n", | |
" 32,\n", | |
" 9,\n", | |
" 78,\n", | |
" 53,\n", | |
" 21,\n", | |
" 19,\n", | |
" 9,\n", | |
" 20,\n", | |
" 56,\n", | |
" 44,\n", | |
" 70]" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ls.sort() # sort the list in-place\n", | |
"ls # show the sorted list; note there are some repeated elements" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"[3,\n", | |
" 9,\n", | |
" 9,\n", | |
" 12,\n", | |
" 15,\n", | |
" 18,\n", | |
" 19,\n", | |
" 19,\n", | |
" 20,\n", | |
" 21,\n", | |
" 24,\n", | |
" 32,\n", | |
" 33,\n", | |
" 44,\n", | |
" 48,\n", | |
" 50,\n", | |
" 51,\n", | |
" 53,\n", | |
" 56,\n", | |
" 56,\n", | |
" 61,\n", | |
" 63,\n", | |
" 70,\n", | |
" 78,\n", | |
" 80,\n", | |
" 81,\n", | |
" 82,\n", | |
" 83,\n", | |
" 85,\n", | |
" 95]" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# The number 40 is not in the sorted list. If it were, where would it go?\n", | |
"bisect_left(ls, 40)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"13" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# insert it\n", | |
"ls.insert(13, 40)\n", | |
"ls # show the new list" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"[3,\n", | |
" 9,\n", | |
" 9,\n", | |
" 12,\n", | |
" 15,\n", | |
" 18,\n", | |
" 19,\n", | |
" 19,\n", | |
" 20,\n", | |
" 21,\n", | |
" 24,\n", | |
" 32,\n", | |
" 33,\n", | |
" 40,\n", | |
" 44,\n", | |
" 48,\n", | |
" 50,\n", | |
" 51,\n", | |
" 53,\n", | |
" 56,\n", | |
" 56,\n", | |
" 61,\n", | |
" 63,\n", | |
" 70,\n", | |
" 78,\n", | |
" 80,\n", | |
" 81,\n", | |
" 82,\n", | |
" 83,\n", | |
" 85,\n", | |
" 95]" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# The number 19 appears in the list multiple times.\n", | |
"# Say we want a range [st, en) where st is the first element\n", | |
"# (inclusive) and en is last element (exclusive) that contains a 19\n", | |
"st, en = bisect_left(ls, 19), bisect_right(ls, 19)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"st, en" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"(6, 8)" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ls[st:en]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"[19, 19]" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Of course, there also exists a total ordering over strings\n", | |
"# (lexicographical ordering), so we can do all the same things\n", | |
"# with strings\n", | |
"strls = ['a', 'awkward', 'awl', 'awls', 'axe', 'axes', 'bee']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"strls == sorted(strls)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
"True" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# We want to get the range of elements that equal a query string:\n", | |
"st, en = bisect_left(strls, 'awl'), bisect_right(strls, 'awl')\n", | |
"st, en" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
"(2, 3)" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Say we want to get the range of elements that have some prefix,\n", | |
"# e.g. 'aw' and say that 'z' is the lexicographically greatest\n", | |
"# character in the alphabet.\n", | |
"st, en = bisect_left(strls, 'aw'), bisect_left(strls, 'ax')\n", | |
"st, en" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"(1, 4)" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"strls[st:en]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 15, | |
"text": [ | |
"['awkward', 'awl', 'awls']" | |
] | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# If we can do it for any sorted list of strings, we can do it for\n", | |
"# a sorted list of suffixes\n", | |
"t = 'abaaba$'\n", | |
"suffixes = sorted([t[i:] for i in xrange(len(t))])\n", | |
"suffixes" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 16, | |
"text": [ | |
"['$', 'a$', 'aaba$', 'aba$', 'abaaba$', 'ba$', 'baaba$']" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"st, en = bisect_left(suffixes, 'aba'), bisect_right(suffixes, 'abaz')\n", | |
"st, en" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 17, | |
"text": [ | |
"(3, 5)" | |
] | |
} | |
], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 17 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment