Skip to content

Instantly share code, notes, and snippets.

@BenLangmead
Last active December 24, 2015 05:49
Show Gist options
  • Save BenLangmead/6753277 to your computer and use it in GitHub Desktop.
Save BenLangmead/6753277 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from bisect import bisect_left, bisect_right # for binary search\n",
"import random\n",
"random.seed(77) # so that you and I get same random lists"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = [1, 2, 3, 3, 3, 4, 5]\n",
"bisect_left(a, 3), bisect_right(a, 3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"(2, 5)"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = [2, 4, 6, 8, 10]\n",
"bisect_left(a, 5), bisect_right(a, 5)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"(2, 2)"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# make a list of pseudo-random integers in [1, 99]\n",
"ls = [ random.randint(1, 99) for _ in xrange(30) ]\n",
"ls # show it"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"[80,\n",
" 33,\n",
" 24,\n",
" 82,\n",
" 12,\n",
" 48,\n",
" 56,\n",
" 61,\n",
" 15,\n",
" 63,\n",
" 50,\n",
" 85,\n",
" 3,\n",
" 95,\n",
" 18,\n",
" 81,\n",
" 51,\n",
" 83,\n",
" 19,\n",
" 32,\n",
" 9,\n",
" 78,\n",
" 53,\n",
" 21,\n",
" 19,\n",
" 9,\n",
" 20,\n",
" 56,\n",
" 44,\n",
" 70]"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ls.sort() # sort the list in-place\n",
"ls # show the sorted list; note there are some repeated elements"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"[3,\n",
" 9,\n",
" 9,\n",
" 12,\n",
" 15,\n",
" 18,\n",
" 19,\n",
" 19,\n",
" 20,\n",
" 21,\n",
" 24,\n",
" 32,\n",
" 33,\n",
" 44,\n",
" 48,\n",
" 50,\n",
" 51,\n",
" 53,\n",
" 56,\n",
" 56,\n",
" 61,\n",
" 63,\n",
" 70,\n",
" 78,\n",
" 80,\n",
" 81,\n",
" 82,\n",
" 83,\n",
" 85,\n",
" 95]"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# The number 40 is not in the sorted list. If it were, where would it go?\n",
"bisect_left(ls, 40)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"13"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# insert it\n",
"ls.insert(13, 40)\n",
"ls # show the new list"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[3,\n",
" 9,\n",
" 9,\n",
" 12,\n",
" 15,\n",
" 18,\n",
" 19,\n",
" 19,\n",
" 20,\n",
" 21,\n",
" 24,\n",
" 32,\n",
" 33,\n",
" 40,\n",
" 44,\n",
" 48,\n",
" 50,\n",
" 51,\n",
" 53,\n",
" 56,\n",
" 56,\n",
" 61,\n",
" 63,\n",
" 70,\n",
" 78,\n",
" 80,\n",
" 81,\n",
" 82,\n",
" 83,\n",
" 85,\n",
" 95]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# The number 19 appears in the list multiple times.\n",
"# Say we want a range [st, en) where st is the first element\n",
"# (inclusive) and en is last element (exclusive) that contains a 19\n",
"st, en = bisect_left(ls, 19), bisect_right(ls, 19)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"st, en"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"(6, 8)"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ls[st:en]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"[19, 19]"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Of course, there also exists a total ordering over strings\n",
"# (lexicographical ordering), so we can do all the same things\n",
"# with strings\n",
"strls = ['a', 'awkward', 'awl', 'awls', 'axe', 'axes', 'bee']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"strls == sorted(strls)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"True"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# We want to get the range of elements that equal a query string:\n",
"st, en = bisect_left(strls, 'awl'), bisect_right(strls, 'awl')\n",
"st, en"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"(2, 3)"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Say we want to get the range of elements that have some prefix,\n",
"# e.g. 'aw' and say that 'z' is the lexicographically greatest\n",
"# character in the alphabet.\n",
"st, en = bisect_left(strls, 'aw'), bisect_left(strls, 'ax')\n",
"st, en"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"(1, 4)"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"strls[st:en]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
"['awkward', 'awl', 'awls']"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# If we can do it for any sorted list of strings, we can do it for\n",
"# a sorted list of suffixes\n",
"t = 'abaaba$'\n",
"suffixes = sorted([t[i:] for i in xrange(len(t))])\n",
"suffixes"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 16,
"text": [
"['$', 'a$', 'aaba$', 'aba$', 'abaaba$', 'ba$', 'baaba$']"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"st, en = bisect_left(suffixes, 'aba'), bisect_right(suffixes, 'abaz')\n",
"st, en"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 17,
"text": [
"(3, 5)"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment