This is merely a gist of great speech made by Raymond Hettinger @raymondh at PyCon 2013 Original video can be enjoyed here and original slides can be found there
Almost all code snippets are presenting two or three ways of doing things: bad, ugly and pythonic :) I changed code a bit to make it Python3 compatible and added some notes regarding Python2/3 transition
- Loops
- When you see this, do that instead
- Looping over a range of numbers
- Looping over a collection
- Looping backwards
- Looping over a collection and indicies
- Looping over two collections
- Looping in sorted order
- Custom sort order
- Call a function until a sentinel value
- Distinguishing multiple exit points in loops
- Dictionary Skills
- Improving Clarity
- (Un)packing
- Efficiency
- Decorators and Context Managers
- Concise Expressive One-Liners
- Replace traditional index manipulation with Python’s core looping idioms
- Learn advanced techniques with for-else clauses and the two argument form of iter()
- Improve your craftmanship and aim for clean, fast, idiomatic Python code
If you mutate something you're iterating over, you're living in a state of sin and deserve whatever happens to you Raymond Hettinger
# Code below could use some improvement
for i in [0, 1, 2, 3, 4, 5]:
print(i**2)
# This one faster & better
for i in range(6):
print(i**2)
# This was kinda best, but in Python3 xrange() got renamed to range()
for i in xrange(6):
print(i**2)
colors = ['red', 'green', 'blue', 'yellow']
# Copy-pasted from straight PyUglyCodeConf slides :)
for i in range(len(colors)):
print(colors[i])
# and reworked in pythonic way
for color in colors:
print(color)
colors = ['red', 'green', 'blue', 'yellow']
# Another piece of delight to get rid of
for i in range(len(colors)-1, -1, -1):
print(colors[i])
# To make things pythonic
for color in reversed(colors):
print(color)
colors = ['red', 'green', 'blue', 'yellow']
# Python was lot of questionable fun back in a day
for i in range(len(colors)):
print(i, '-->', colors[i])
# Now it's about using right tool for the task in hand
for i, color in enumerate(colors):
print(i, '-->', color)
names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue', 'yellow']
n = min(len(names), len(colors))
for i in range(n):
print(names[i], '-->', colors[i])
for name, color in zip(names, colors):
print(name, '-->', color)
# Note: zip computes all the list at once, izip computes the elements only when requested.
# In Python3 zip returns iterator pretty much like old izip
for name, color in izip(names, colors):
print(name, '-->', color)
colors = ['red', 'green', 'blue', 'yellow']
for color in sorted(colors):
print(color)
for color in sorted(colors, reverse=True):
print(color)
colors = ['red', 'green', 'blue', 'yellow']
def compare_length(c1, c2):
if len(c1) < len(c2): return -1
if len(c1) > len(c2): return 1
return 0
print(sorted(colors, cmp=compare_length))
print(sorted(colors, key=len))
blocks = []
while True:
block = f.read(32)
if block == '':
break
blocks.append(block)
blocks = []
for block in iter(partial(f.read, 32), ''):
blocks.append(block)
def find(seq, target):
found = False
for i, value in enumerate(seq):
if value == tgt:
found = True
break
if not found:
return -1
return i
def find(seq, target):
for i, value in enumerate(seq):
if value == tgt:
break
else:
return -1
return i
There's two kinds of people in the world: people who've mastered dictionaries and total goobers. Raymond Hettinger
- Mastering dictionaries is a fundamental Python skill
- They are fundamental for expressing relationships, linking, counting, and grouping
d = {'matthew': 'blue', 'rachel': 'green', 'raymond':'red'}
for k in d:
print(k)
for k in d.keys():
if k.startswith('r'):
del d[k]
d = {k : d[k] for k in d if not k.startswith('r')}
for k in d:
print(k, '-->', d[k])
# Note: in Python3 this works pretty much like next snippet
for k, v in d.items():
print(k, '-->', v)
for k, v in d.iteritems():
print(k, '-->', v)
names = ['raymond', 'rachel', 'matthew']
colors = ['red', 'green', 'blue']
# Change zip to izip in case of Python2
d = dict(zip(names, colors))
{'matthew': 'blue', 'rachel': 'green', 'raymond': 'red'}
d = dict(enumerate(names))
{0: 'raymond', 1: 'rachel', 2: 'matthew'}
colors = ['red', 'green', 'red', 'blue', 'green', 'red']
d = {}
for color in colors:
if color not in d:
d[color] = 0
d[color] += 1
{'blue': 1, 'green': 2, 'red': 3}
d = {}
for color in colors:
d[color] = d.get(color, 0) + 1
from collections import defaultdict
d = defaultdict(int)
for color in colors:
d[color] += 1
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
d = {}
for name in names:
key = len(name)
if key not in d:
d[key] = []
d[key].append(name)
#> {5: ['roger', 'betty'], 6: ['rachel', 'judith'],
#> 7: ['raymond', 'matthew', 'melissa', 'charlie']}
d = {}
for name in names:
key = len(name)
d.setdefault(key, []).append(name)
d = defaultdict(list)
for name in names:
key = len(name)
d[key].append(name)
d = {'matthew': 'blue', 'rachel': 'green', 'raymond':'red'}
while d:
key, value = d.popitem()
print(key, '-->', value)
defaults = {'color': 'red', 'user': 'guest'}
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--user')
parser.add_argument('-c', '--color')
namespace = parser.parse_args([])
command_line_args = {k:v for k, v in
vars(namespace).items() if v}
d = defaults.copy()
d.update(os.environ)
d.update(command_line_args)
d = ChainMap(command_line_args, os.environ, defaults)
- Positional arguments and indicies are nice
- Keywords and names are better
- The first way is convenient for the computer
- The second corresponds to how human’s think
twitter_search('@obama', False, 20, True)
twitter_search('@obama', retweets=False, numtweets=20,
popular=True)
doctest.testmod()
#> (0, 4)
doctest.testmod()
TestResults(failed=0,attempted=4)
TestResults = namedtuple('TestResults', ['failed', 'attempted'])
p = 'Raymond', 'Hettinger', 0x30, '[email protected]'
fname = p[0]
lname = p[1]
age = p[2]
email = p[3]
fname, lname, age, email = p
def fibonacci(n):
x = 0
y = 1
for i in range(n):
print(x)
t = y
y = x + y
x = t
def fibonacci(n):
x, y = 0, 1
for i in range(n):
print(x)
x, y = y, x+y
- Don’t underestimate the advantages of updating state variables at the same time
- It eliminates an entire class of errors due to out‐of‐order updates
- It allows high level thinking: “chunking”
tmp_x = x + dx * t
tmp_y = y + dy * t
tmp_dx = influence(m, x, y, dx, dy, partial='x')
tmp_dy = influence(m, x, y, dx, dy, partial='y')
x = tmp_x
y = tmp_y
dx = tmp_dx
dy = tmp_dy
x, y, dx, dy = (x + dx * t,
y + dy * t,
influence(m, x, y, dx, dy, partial='x'),
influence(m, x, y, dx, dy, partial='y'))
- An optimization fundamental rule
- Don’t cause data to move around unnecessarily
- It takes only a little care to avoid O(n**2) behavior instead of linear behavior
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
s = names[0]
for name in names[1:]:
s += ', ' + name
print(s)
print(', '.join(names))
names = ['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie']
del names[0]
names.pop(0)
names.insert(0,'mark')
names = deque(['raymond', 'rachel', 'matthew', 'roger',
'betty', 'melissa', 'judith', 'charlie'])
del names[0]
names.popleft()
names.appendleft('mark')
- Helps separate business logic from administrative logic
- Clean, beautiful tools for factoring code and improving code reuse
- Good naming is essential.
- Remember the Spiderman rule: With great power, comes great respsonsibility!
def web_lookup(url, saved={}):
if url in saved:
return saved[url]
page = urllib.urlopen(url).read()
saved[url] = page
return page
@cache
def web_lookup(url):
return urllib.urlopen(url).read()
def cache(func):
saved = {}
@wraps(func)
def newfunc(*args):
if args in saved:
return newfunc(*args)
result = func(*args)
saved[args] = result
return result
return newfunc
old_context = getcontext().copy()
getcontext().prec = 50
print(Decimal(355) / Decimal(113))
setcontext(old_context)
with localcontext(Context(prec=50)):
print(Decimal(355) / Decimal(113))
f = open('data.txt')
try:
data = f.read()
finally:
f.close()
with open('data.txt') as f:
data = f.read()
# Make a lock
lock = threading.Lock()
# Old-way to use a lock
lock.acquire()
try:
print('Critical section 1')
print('Critical section 2')
finally:
lock.release()
# New-way to use a lock
with lock:
print('Critical section 1')
print('Critical section 2')
# Old and silly way
try:
os.remove('somefile.tmp')
except OSError:
pass
# Pythonic way
from contextlib import contextmanager
@contextmanager
def ignored(*exceptions):
try:
yield
except exceptions:
pass
with ignored(OSError):
os.remove('somefile.tmp')
# In Python 3.4 you can use suppress() context manager
# https://docs.python.org/3/library/contextlib.html#contextlib.suppress
with suppress(OSError):
os.remove('somefile.tmp')
# Weak old stuff
with open('help.txt', 'w') as f:
oldstdout = sys.stdout
sys.stdout = f
try:
help(pow)
finally:
sys.stdout = oldstdout
# Cool new shit
from contextlib import contextmanager
@contextmanager
def redirect_stdout(fileobj):
oldstdout = sys.stdout
sys.stdout = fileobj
try:
yield fieldobj
finally:
sys.stdout = oldstdout
with open('help.txt', 'w') as f:
with redirect_stdout(f):
help(pow)
Two conflicting rules:
- Don’t put too much on one line
- Don’t break atoms of thought into subatomic particles
Raymond’s rule:
One logical line of code equals one sentence in English
# Old school
result = []
for i in range(10):
s = i ** 2
result.append(s)
print(sum(result))
# New way. Change range() for xrange() in case of Python2
print(sum([i**2 for i in range(10)]))
# Even faster
print(sum(i**2 for i in range(10)))