Created
          August 11, 2011 16:54 
        
      - 
      
 - 
        
Save heavenshell/1140168 to your computer and use it in GitHub Desktop.  
    Benchmark re.sub() or str.replace()
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| import re | |
| from benchmarker import Benchmarker, cmdopt | |
| def replace_reserved_chars(value): | |
| """ | |
| Replace reserved chars to '-'. | |
| :param value: Replace string | |
| """ | |
| ret = value.replace(';', '-') \ | |
| .replace('/', '-') \ | |
| .replace('?', '-') \ | |
| .replace(':', '-') \ | |
| .replace('@', '-') \ | |
| .replace('&', '-') \ | |
| .replace('=', '-') \ | |
| .replace('+', '-') \ | |
| .replace(',', '-') \ | |
| .replace('(', '-') \ | |
| .replace(')', '-') \ | |
| .replace('[', '-') \ | |
| .replace(']', '-') \ | |
| .replace('!', '-') \ | |
| .replace('"', '-') \ | |
| .replace('#', '-') \ | |
| .replace('%', '-') \ | |
| .replace("'", '-') \ | |
| .replace('~', '-') \ | |
| .replace('\\', '-') \ | |
| .replace('`', '-') \ | |
| .replace(' ', '-') | |
| return ret | |
| def replace_reserved_chars_regex(value): | |
| pattern = r'[\s+]|;|/|\?|:|@|&|=|\+|,|\(|\)|\[|\]|!|"|#|%|\'|~|\\|`' | |
| ret = re.sub(pattern, '-', value) | |
| return ret | |
| def replace_reserved_chars_regex_compile(value): | |
| pattern = r'[\s+]|;|/|\?|:|@|&|=|\+|,|\(|\)|\[|\]|!|"|#|%|\'|~|\\|`' | |
| r = re.compile(pattern) | |
| ret = r.sub('-', value) | |
| return ret | |
| cmdopt.parse() | |
| with Benchmarker(width=20, loop=100*1000) as bm: | |
| for _ in bm.empty(): | |
| pass | |
| for _ in bm('replace one'): | |
| replace_reserved_chars('Hello world') | |
| for _ in bm('re one'): | |
| replace_reserved_chars_regex('Hello world') | |
| for _ in bm('re compile one'): | |
| replace_reserved_chars_regex_compile('Hello world') | |
| with Benchmarker(width=20, loop=100*1000) as bm: | |
| for _ in bm.empty(): | |
| pass | |
| for _ in bm('replace'): | |
| replace_reserved_chars(u'エメラルド Tour 2010(初回限定盤) [DVD]') | |
| for _ in bm('re'): | |
| replace_reserved_chars_regex(u'エメラルド Tour 2010(初回限定盤) [DVD]') for _ in bm('re compile'): | |
| replace_reserved_chars_regex_compile(u'エメラルド Tour 2010(初回限定盤) | |
| [DVD]') | |
| with Benchmarker(width=20, loop=100*1000) as bm: | |
| for _ in bm.empty(): | |
| pass | |
| for _ in bm('replace all'): | |
| replace_reserved_chars(' ;/?:@&+,()[]!"#%\'~\\`') | |
| for _ in bm('re all'): | |
| replace_reserved_chars_regex(' ;/?:@&+,()[]!"#%\'~\\`') | |
| for _ in bm('re compile all'): | |
| replace_reserved_chars_regex_compile(' ;/?:@&+,()[]!"#%\'~\\`') | |
| with Benchmarker(width=20, loop=100*1000) as bm: | |
| for _ in bm.empty(): | |
| pass | |
| for _ in bm('replace none'): | |
| replace_reserved_chars(u'エメラルド-Tour-2010-初回限定盤-DVD') | |
| for _ in bm('re none'): | |
| replace_reserved_chars_regex(u'エメラルド-Tour-2010-初回限定盤-DVD') | |
| for _ in bm('re compile none'): | |
| replace_reserved_chars_regex_compile(u'エメラルド-Tour-2010-初回限定盤-DVD') | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment