Last active
April 7, 2019 14:13
-
-
Save walkerh/1b58bc4ac555d347d23ffd5be22e6351 to your computer and use it in GitHub Desktop.
Demonstration of grouping by repeating letters in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# `itertools` to the Rescue\n", | |
"\n", | |
"https://docs.python.org/3/library/itertools.html#itertools.groupby\n", | |
"\n", | |
"Given a list of strings comprising capital letters, find (for each string) the longest consecutive substring containing a single repeated letter, and print out the letter along with the number of times it repeats itself in that substring. If there is a tie, choose the letter that comes earliest in the alphabet." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from itertools import groupby" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"strings = \"\"\"\n", | |
"HELLOTHEREWORLD\n", | |
"NOWISTHETIMEFORALLGOOD\n", | |
"MEETBEETBOOBOO\n", | |
"ZZOEUCUTAAEUCUTTTCUU\n", | |
"\"\"\".split()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['HELLOTHEREWORLD',\n", | |
" 'NOWISTHETIMEFORALLGOOD',\n", | |
" 'MEETBEETBOOBOO',\n", | |
" 'ZZOEUCUTAAEUCUTTTCUU']" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"strings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"H 1\n", | |
"E 1\n", | |
"L 2\n", | |
"O 1\n", | |
"T 1\n", | |
"H 1\n", | |
"E 1\n", | |
"R 1\n", | |
"E 1\n", | |
"W 1\n", | |
"O 1\n", | |
"R 1\n", | |
"L 1\n", | |
"D 1\n", | |
"\n", | |
"N 1\n", | |
"O 1\n", | |
"W 1\n", | |
"I 1\n", | |
"S 1\n", | |
"T 1\n", | |
"H 1\n", | |
"E 1\n", | |
"T 1\n", | |
"I 1\n", | |
"M 1\n", | |
"E 1\n", | |
"F 1\n", | |
"O 1\n", | |
"R 1\n", | |
"A 1\n", | |
"L 2\n", | |
"G 1\n", | |
"O 2\n", | |
"D 1\n", | |
"\n", | |
"M 1\n", | |
"E 2\n", | |
"T 1\n", | |
"B 1\n", | |
"E 2\n", | |
"T 1\n", | |
"B 1\n", | |
"O 2\n", | |
"B 1\n", | |
"O 2\n", | |
"\n", | |
"Z 2\n", | |
"O 1\n", | |
"E 1\n", | |
"U 1\n", | |
"C 1\n", | |
"U 1\n", | |
"T 1\n", | |
"A 2\n", | |
"E 1\n", | |
"U 1\n", | |
"C 1\n", | |
"U 1\n", | |
"T 3\n", | |
"C 1\n", | |
"U 2\n" | |
] | |
} | |
], | |
"source": [ | |
"for s in strings:\n", | |
" print()\n", | |
" for letter, iterator in groupby(s):\n", | |
" print(letter, sum(1 for _ in iterator))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"[(1, 'D'), (1, 'E'), (1, 'E'), (1, 'E'), (1, 'H'), (1, 'H'), (1, 'L'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'R'), (1, 'T'), (1, 'W'), (2, 'L')]\n", | |
"\n", | |
"[(1, 'A'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'F'), (1, 'G'), (1, 'H'), (1, 'I'), (1, 'I'), (1, 'M'), (1, 'N'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'S'), (1, 'T'), (1, 'T'), (1, 'W'), (2, 'L'), (2, 'O')]\n", | |
"\n", | |
"[(1, 'B'), (1, 'B'), (1, 'B'), (1, 'M'), (1, 'T'), (1, 'T'), (2, 'E'), (2, 'E'), (2, 'O'), (2, 'O')]\n", | |
"\n", | |
"[(1, 'C'), (1, 'C'), (1, 'C'), (1, 'E'), (1, 'E'), (1, 'O'), (1, 'T'), (1, 'U'), (1, 'U'), (1, 'U'), (1, 'U'), (2, 'A'), (2, 'U'), (2, 'Z'), (3, 'T')]\n" | |
] | |
} | |
], | |
"source": [ | |
"for s in strings:\n", | |
" print()\n", | |
" print(\n", | |
" sorted(\n", | |
" [\n", | |
" (sum(1 for _ in iterator), letter)\n", | |
" for letter, iterator in groupby(s)\n", | |
" ]\n", | |
" )\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def sort_key(t):\n", | |
" count, letter = t\n", | |
" return -count, letter" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"[(2, 'L'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'E'), (1, 'H'), (1, 'H'), (1, 'L'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'R'), (1, 'T'), (1, 'W')]\n", | |
"\n", | |
"[(2, 'L'), (2, 'O'), (1, 'A'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'F'), (1, 'G'), (1, 'H'), (1, 'I'), (1, 'I'), (1, 'M'), (1, 'N'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'S'), (1, 'T'), (1, 'T'), (1, 'W')]\n", | |
"\n", | |
"[(2, 'E'), (2, 'E'), (2, 'O'), (2, 'O'), (1, 'B'), (1, 'B'), (1, 'B'), (1, 'M'), (1, 'T'), (1, 'T')]\n", | |
"\n", | |
"[(3, 'T'), (2, 'A'), (2, 'U'), (2, 'Z'), (1, 'C'), (1, 'C'), (1, 'C'), (1, 'E'), (1, 'E'), (1, 'O'), (1, 'T'), (1, 'U'), (1, 'U'), (1, 'U'), (1, 'U')]\n" | |
] | |
} | |
], | |
"source": [ | |
"for s in strings:\n", | |
" print()\n", | |
" print(\n", | |
" sorted(\n", | |
" [\n", | |
" (sum(1 for _ in iterator), letter)\n", | |
" for letter, iterator in groupby(s)\n", | |
" ],\n", | |
" key=sort_key,\n", | |
" )\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 L\n", | |
"2 L\n", | |
"2 E\n", | |
"3 T\n" | |
] | |
} | |
], | |
"source": [ | |
"for s in strings:\n", | |
" print(\n", | |
" *sorted(\n", | |
" [\n", | |
" (sum(1 for _ in iterator), letter)\n", | |
" for letter, iterator in groupby(s)\n", | |
" ],\n", | |
" key=sort_key,\n", | |
" )[0]\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2 L HELLOTHEREWORLD\n", | |
"2 L NOWISTHETIMEFORALLGOOD\n", | |
"2 E MEETBEETBOOBOO\n", | |
"3 T ZZOEUCUTAAEUCUTTTCUU\n" | |
] | |
} | |
], | |
"source": [ | |
"for s in strings:\n", | |
" print(\n", | |
" *sorted(\n", | |
" [\n", | |
" (sum(1 for _ in iterator), letter)\n", | |
" for letter, iterator in groupby(s)\n", | |
" ],\n", | |
" key=sort_key,\n", | |
" )[0],\n", | |
" s\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment